aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2008-02-03 02:29:41 -0500
committerDavid Woodhouse <dwmw2@infradead.org>2008-02-03 02:30:32 -0500
commitc1f3ee120bb61045b1c0a3ead620d1d65af47130 (patch)
tree908430bf2b47fe8e96ac623ae7ab6dd5698d0938 /net
parente619a75ff6201b567a539e787aa9af9bc63a3187 (diff)
parent9135f1901ee6449dfe338adf6e40e9c2025b8150 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'net')
-rw-r--r--net/802/Makefile3
-rw-r--r--net/802/sysctl_net_802.c33
-rw-r--r--net/802/tr.c33
-rw-r--r--net/8021q/vlan.c438
-rw-r--r--net/8021q/vlan.h41
-rw-r--r--net/8021q/vlan_dev.c510
-rw-r--r--net/8021q/vlan_netlink.c17
-rw-r--r--net/8021q/vlanproc.c109
-rw-r--r--net/8021q/vlanproc.h11
-rw-r--r--net/9p/conv.c5
-rw-r--r--net/9p/mod.c4
-rw-r--r--net/9p/trans_fd.c3
-rw-r--r--net/Kconfig13
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c6
-rw-r--r--net/appletalk/atalk_proc.c6
-rw-r--r--net/appletalk/ddp.c7
-rw-r--r--net/appletalk/sysctl_net_atalk.c24
-rw-r--r--net/atm/Kconfig13
-rw-r--r--net/atm/atm_sysfs.c66
-rw-r--r--net/atm/br2684.c296
-rw-r--r--net/atm/clip.c33
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/lec.c7
-rw-r--r--net/atm/mpc.c7
-rw-r--r--net/atm/proc.c6
-rw-r--r--net/ax25/af_ax25.c79
-rw-r--r--net/ax25/ax25_ds_timer.c2
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/ax25/ax25_std_timer.c4
-rw-r--r--net/ax25/ax25_subr.c2
-rw-r--r--net/ax25/ax25_uid.c6
-rw-r--r--net/ax25/sysctl_net_ax25.c27
-rw-r--r--net/bluetooth/bnep/sock.c6
-rw-r--r--net/bluetooth/cmtp/sock.c6
-rw-r--r--net/bluetooth/hci_conn.c19
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hci_sysfs.c70
-rw-r--r--net/bluetooth/hidp/core.c5
-rw-r--r--net/bluetooth/hidp/sock.c12
-rw-r--r--net/bluetooth/l2cap.c15
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c25
-rw-r--r--net/bluetooth/sco.c11
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_if.c12
-rw-r--r--net/bridge/br_input.c9
-rw-r--r--net/bridge/br_netfilter.c92
-rw-r--r--net/bridge/br_netlink.c13
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/br_sysfs_br.c14
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/ebt_802_3.c10
-rw-r--r--net/bridge/netfilter/ebt_among.c29
-rw-r--r--net/bridge/netfilter/ebt_arp.c19
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c17
-rw-r--r--net/bridge/netfilter/ebt_dnat.c8
-rw-r--r--net/bridge/netfilter/ebt_ip.c14
-rw-r--r--net/bridge/netfilter/ebt_limit.c6
-rw-r--r--net/bridge/netfilter/ebt_log.c22
-rw-r--r--net/bridge/netfilter/ebt_mark.c8
-rw-r--r--net/bridge/netfilter/ebt_mark_m.c8
-rw-r--r--net/bridge/netfilter/ebt_pkttype.c8
-rw-r--r--net/bridge/netfilter/ebt_redirect.c8
-rw-r--r--net/bridge/netfilter/ebt_snat.c11
-rw-r--r--net/bridge/netfilter/ebt_stp.c28
-rw-r--r--net/bridge/netfilter/ebt_ulog.c16
-rw-r--r--net/bridge/netfilter/ebt_vlan.c14
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/can/Kconfig44
-rw-r--r--net/can/Makefile12
-rw-r--r--net/can/af_can.c861
-rw-r--r--net/can/af_can.h122
-rw-r--r--net/can/bcm.c1561
-rw-r--r--net/can/proc.c533
-rw-r--r--net/can/raw.c763
-rw-r--r--net/compat.c108
-rw-r--r--net/core/datagram.c54
-rw-r--r--net/core/dev.c333
-rw-r--r--net/core/dev_mcast.c69
-rw-r--r--net/core/dst.c12
-rw-r--r--net/core/fib_rules.c124
-rw-r--r--net/core/flow.c7
-rw-r--r--net/core/gen_estimator.c23
-rw-r--r--net/core/gen_stats.c10
-rw-r--r--net/core/neighbour.c264
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/net_namespace.c191
-rw-r--r--net/core/netpoll.c93
-rw-r--r--net/core/pktgen.c109
-rw-r--r--net/core/request_sock.c34
-rw-r--r--net/core/rtnetlink.c61
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c304
-rw-r--r--net/core/sock.c279
-rw-r--r--net/core/stream.c85
-rw-r--r--net/core/sysctl_net_core.c70
-rw-r--r--net/core/utils.c27
-rw-r--r--net/dccp/Kconfig1
-rw-r--r--net/dccp/ackvec.c163
-rw-r--r--net/dccp/ackvec.h62
-rw-r--r--net/dccp/ccid.c8
-rw-r--r--net/dccp/ccid.h37
-rw-r--r--net/dccp/ccids/Kconfig30
-rw-r--r--net/dccp/ccids/ccid2.c228
-rw-r--r--net/dccp/ccids/ccid2.h21
-rw-r--r--net/dccp/ccids/ccid3.c710
-rw-r--r--net/dccp/ccids/ccid3.h41
-rw-r--r--net/dccp/ccids/lib/Makefile2
-rw-r--r--net/dccp/ccids/lib/loss_interval.c352
-rw-r--r--net/dccp/ccids/lib/loss_interval.h64
-rw-r--r--net/dccp/ccids/lib/packet_history.c599
-rw-r--r--net/dccp/ccids/lib/packet_history.h220
-rw-r--r--net/dccp/ccids/lib/tfrc.c63
-rw-r--r--net/dccp/ccids/lib/tfrc.h29
-rw-r--r--net/dccp/dccp.h35
-rw-r--r--net/dccp/feat.c29
-rw-r--r--net/dccp/feat.h26
-rw-r--r--net/dccp/input.c155
-rw-r--r--net/dccp/ipv4.c21
-rw-r--r--net/dccp/ipv6.c21
-rw-r--r--net/dccp/minisocks.c33
-rw-r--r--net/dccp/options.c139
-rw-r--r--net/dccp/output.c55
-rw-r--r--net/dccp/proto.c203
-rw-r--r--net/dccp/sysctl.c36
-rw-r--r--net/dccp/timer.c5
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/dn_dev.c74
-rw-r--r--net/decnet/dn_fib.c10
-rw-r--r--net/decnet/dn_neigh.c6
-rw-r--r--net/decnet/dn_nsp_out.c2
-rw-r--r--net/decnet/dn_route.c59
-rw-r--r--net/decnet/dn_rules.c16
-rw-r--r--net/decnet/dn_table.c8
-rw-r--r--net/decnet/netfilter/Kconfig1
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c6
-rw-r--r--net/decnet/sysctl_net_decnet.c23
-rw-r--r--net/econet/af_econet.c5
-rw-r--r--net/ethernet/eth.c30
-rw-r--r--net/ieee80211/Kconfig5
-rw-r--r--net/ieee80211/ieee80211_crypt_ccmp.c3
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c27
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c1
-rw-r--r--net/ieee80211/ieee80211_module.c5
-rw-r--r--net/ieee80211/ieee80211_rx.c51
-rw-r--r--net/ieee80211/ieee80211_tx.c15
-rw-r--r--net/ieee80211/ieee80211_wx.c2
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_auth.c6
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_io.c10
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_wx.c2
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c48
-rw-r--r--net/ipv4/ah4.c20
-rw-r--r--net/ipv4/arp.c199
-rw-r--r--net/ipv4/cipso_ipv4.c61
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c453
-rw-r--r--net/ipv4/esp4.c546
-rw-r--r--net/ipv4/fib_frontend.c293
-rw-r--r--net/ipv4/fib_hash.c192
-rw-r--r--net/ipv4/fib_lookup.h16
-rw-r--r--net/ipv4/fib_rules.c114
-rw-r--r--net/ipv4/fib_semantics.c172
-rw-r--r--net/ipv4/fib_trie.c937
-rw-r--r--net/ipv4/icmp.c131
-rw-r--r--net/ipv4/igmp.c56
-rw-r--r--net/ipv4/inet_connection_sock.c27
-rw-r--r--net/ipv4/inet_diag.c85
-rw-r--r--net/ipv4/inet_fragment.c85
-rw-r--r--net/ipv4/inet_hashtables.c153
-rw-r--r--net/ipv4/inet_lro.c19
-rw-r--r--net/ipv4/inet_timewait_sock.c36
-rw-r--r--net/ipv4/inetpeer.c42
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c202
-rw-r--r--net/ipv4/ip_gre.c138
-rw-r--r--net/ipv4/ip_input.c22
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_output.c96
-rw-r--r--net/ipv4/ip_sockglue.c41
-rw-r--r--net/ipv4/ipcomp.c31
-rw-r--r--net/ipv4/ipconfig.c74
-rw-r--r--net/ipv4/ipip.c72
-rw-r--r--net/ipv4/ipmr.c35
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c9
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c91
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c138
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c88
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c45
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c121
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c28
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c61
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c16
-rw-r--r--net/ipv4/netfilter.c43
-rw-r--r--net/ipv4/netfilter/Kconfig89
-rw-r--r--net/ipv4/netfilter/Makefile21
-rw-r--r--net/ipv4/netfilter/arp_tables.c1047
-rw-r--r--net/ipv4/netfilter/arptable_filter.c33
-rw-r--r--net/ipv4/netfilter/ip_queue.c263
-rw-r--r--net/ipv4/netfilter/ip_tables.c654
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c62
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c43
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c45
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c43
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c52
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c47
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c155
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c179
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c87
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c40
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c52
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c115
-rw-r--r--net/ipv4/netfilter/ipt_ah.c39
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c35
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c79
-rw-r--r--net/ipv4/netfilter/ipt_owner.c92
-rw-r--r--net/ipv4/netfilter/ipt_recent.c47
-rw-r--r--net/ipv4/netfilter/ipt_tos.c55
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c26
-rw-r--r--net/ipv4/netfilter/iptable_filter.c57
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c75
-rw-r--r--net/ipv4/netfilter/iptable_raw.c51
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c54
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c45
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c52
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c117
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c49
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c32
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c24
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c19
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c54
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c22
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c4
-rw-r--r--net/ipv4/proc.c28
-rw-r--r--net/ipv4/raw.c235
-rw-r--r--net/ipv4/route.c633
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c171
-rw-r--r--net/ipv4/tcp.c229
-rw-r--r--net/ipv4/tcp_bic.c3
-rw-r--r--net/ipv4/tcp_cong.c23
-rw-r--r--net/ipv4/tcp_cubic.c3
-rw-r--r--net/ipv4/tcp_highspeed.c3
-rw-r--r--net/ipv4/tcp_htcp.c3
-rw-r--r--net/ipv4/tcp_hybla.c5
-rw-r--r--net/ipv4/tcp_illinois.c5
-rw-r--r--net/ipv4/tcp_input.c1331
-rw-r--r--net/ipv4/tcp_ipv4.c60
-rw-r--r--net/ipv4/tcp_lp.c4
-rw-r--r--net/ipv4/tcp_output.c674
-rw-r--r--net/ipv4/tcp_scalable.c3
-rw-r--r--net/ipv4/tcp_timer.c43
-rw-r--r--net/ipv4/tcp_vegas.c44
-rw-r--r--net/ipv4/tcp_veno.c7
-rw-r--r--net/ipv4/tcp_yeah.c3
-rw-r--r--net/ipv4/tunnel4.c24
-rw-r--r--net/ipv4/udp.c134
-rw-r--r--net/ipv4/udplite.c5
-rw-r--r--net/ipv4/xfrm4_input.c134
-rw-r--r--net/ipv4/xfrm4_mode_beet.c62
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c93
-rw-r--r--net/ipv4/xfrm4_output.c95
-rw-r--r--net/ipv4/xfrm4_policy.c220
-rw-r--r--net/ipv4/xfrm4_state.c20
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/addrconf.c591
-rw-r--r--net/ipv6/addrlabel.c561
-rw-r--r--net/ipv6/af_inet6.c186
-rw-r--r--net/ipv6/ah6.c19
-rw-r--r--net/ipv6/anycast.c4
-rw-r--r--net/ipv6/datagram.c11
-rw-r--r--net/ipv6/esp6.c508
-rw-r--r--net/ipv6/exthdrs.c77
-rw-r--r--net/ipv6/fib6_rules.c55
-rw-r--r--net/ipv6/icmp.c100
-rw-r--r--net/ipv6/inet6_hashtables.c158
-rw-r--r--net/ipv6/ip6_fib.c43
-rw-r--r--net/ipv6/ip6_flowlabel.c32
-rw-r--r--net/ipv6/ip6_input.c15
-rw-r--r--net/ipv6/ip6_output.c104
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ipcomp6.c30
-rw-r--r--net/ipv6/ipv6_sockglue.c23
-rw-r--r--net/ipv6/mcast.c24
-rw-r--r--net/ipv6/mip6.c29
-rw-r--r--net/ipv6/ndisc.c43
-rw-r--r--net/ipv6/netfilter.c26
-rw-r--r--net/ipv6/netfilter/Kconfig60
-rw-r--r--net/ipv6/netfilter/Makefile27
-rw-r--r--net/ipv6/netfilter/ip6_queue.c267
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1269
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c39
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c45
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c49
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c39
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c36
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c40
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c44
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c26
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c40
-rw-r--r--net/ipv6/netfilter/ip6t_mh.c39
-rw-r--r--net/ipv6/netfilter/ip6t_owner.c92
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c39
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c57
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c75
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c47
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c57
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c51
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c79
-rw-r--r--net/ipv6/proc.c26
-rw-r--r--net/ipv6/raw.c270
-rw-r--r--net/ipv6/reassembly.c181
-rw-r--r--net/ipv6/route.c355
-rw-r--r--net/ipv6/sit.c149
-rw-r--r--net/ipv6/sysctl_net_ipv6.c163
-rw-r--r--net/ipv6/tcp_ipv6.c91
-rw-r--r--net/ipv6/udp.c108
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udplite.c30
-rw-r--r--net/ipv6/xfrm6_input.c183
-rw-r--r--net/ipv6/xfrm6_mode_beet.c48
-rw-r--r--net/ipv6/xfrm6_mode_ro.c1
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c73
-rw-r--r--net/ipv6/xfrm6_output.c93
-rw-r--r--net/ipv6/xfrm6_policy.c278
-rw-r--r--net/ipv6/xfrm6_state.c25
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/ipx/af_ipx.c24
-rw-r--r--net/ipx/sysctl_net_ipx.c24
-rw-r--r--net/irda/af_irda.c39
-rw-r--r--net/irda/ircomm/ircomm_core.c12
-rw-r--r--net/irda/ircomm/ircomm_param.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/irda/irda_device.c169
-rw-r--r--net/irda/iriap.c4
-rw-r--r--net/irda/irlan/irlan_eth.c4
-rw-r--r--net/irda/irlap_event.c25
-rw-r--r--net/irda/irlap_frame.c2
-rw-r--r--net/irda/irlmp.c8
-rw-r--r--net/irda/irlmp_event.c4
-rw-r--r--net/irda/irnet/irnet_ppp.c10
-rw-r--r--net/irda/irsysctl.c56
-rw-r--r--net/irda/parameters.c18
-rw-r--r--net/irda/wrapper.c2
-rw-r--r--net/iucv/af_iucv.c11
-rw-r--r--net/iucv/iucv.c111
-rw-r--r--net/key/af_key.c60
-rw-r--r--net/lapb/lapb_iface.c2
-rw-r--r--net/llc/llc_conn.c22
-rw-r--r--net/llc/llc_station.c5
-rw-r--r--net/llc/sysctl_net_llc.c24
-rw-r--r--net/mac80211/Kconfig87
-rw-r--r--net/mac80211/Makefile36
-rw-r--r--net/mac80211/aes_ccm.c1
-rw-r--r--net/mac80211/cfg.c552
-rw-r--r--net/mac80211/debugfs_netdev.c60
-rw-r--r--net/mac80211/ieee80211.c233
-rw-r--r--net/mac80211/ieee80211_common.h91
-rw-r--r--net/mac80211/ieee80211_i.h103
-rw-r--r--net/mac80211/ieee80211_iface.c17
-rw-r--r--net/mac80211/ieee80211_ioctl.c139
-rw-r--r--net/mac80211/ieee80211_led.c35
-rw-r--r--net/mac80211/ieee80211_led.h6
-rw-r--r--net/mac80211/ieee80211_rate.c82
-rw-r--r--net/mac80211/ieee80211_rate.h105
-rw-r--r--net/mac80211/ieee80211_sta.c767
-rw-r--r--net/mac80211/key.c6
-rw-r--r--net/mac80211/rc80211_pid.h285
-rw-r--r--net/mac80211/rc80211_pid_algo.c549
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c223
-rw-r--r--net/mac80211/rc80211_simple.c89
-rw-r--r--net/mac80211/rx.c785
-rw-r--r--net/mac80211/sta_info.c56
-rw-r--r--net/mac80211/sta_info.h50
-rw-r--r--net/mac80211/tx.c370
-rw-r--r--net/mac80211/util.c132
-rw-r--r--net/mac80211/wep.c15
-rw-r--r--net/mac80211/wme.c27
-rw-r--r--net/mac80211/wpa.c32
-rw-r--r--net/netfilter/Kconfig182
-rw-r--r--net/netfilter/Makefile20
-rw-r--r--net/netfilter/core.c47
-rw-r--r--net/netfilter/nf_conntrack_core.c256
-rw-r--r--net/netfilter/nf_conntrack_expect.c65
-rw-r--r--net/netfilter/nf_conntrack_extend.c2
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c164
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c72
-rw-r--r--net/netfilter/nf_conntrack_h323_types.c346
-rw-r--r--net/netfilter/nf_conntrack_helper.c60
-rw-r--r--net/netfilter/nf_conntrack_irc.c2
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c332
-rw-r--r--net/netfilter/nf_conntrack_pptp.c14
-rw-r--r--net/netfilter/nf_conntrack_proto.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c334
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c288
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c33
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c32
-rw-r--r--net/netfilter/nf_conntrack_sane.c9
-rw-r--r--net/netfilter/nf_conntrack_sip.c39
-rw-r--r--net/netfilter/nf_conntrack_standalone.c82
-rw-r--r--net/netfilter/nf_conntrack_tftp.c5
-rw-r--r--net/netfilter/nf_log.c14
-rw-r--r--net/netfilter/nf_queue.c183
-rw-r--r--net/netfilter/nf_sockopt.c117
-rw-r--r--net/netfilter/nf_sysctl.c134
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_log.c205
-rw-r--r--net/netfilter/nfnetlink_queue.c601
-rw-r--r--net/netfilter/x_tables.c384
-rw-r--r--net/netfilter/xt_CLASSIFY.c44
-rw-r--r--net/netfilter/xt_CONNMARK.c158
-rw-r--r--net/netfilter/xt_CONNSECMARK.c64
-rw-r--r--net/netfilter/xt_DSCP.c167
-rw-r--r--net/netfilter/xt_MARK.c158
-rw-r--r--net/netfilter/xt_NFLOG.c39
-rw-r--r--net/netfilter/xt_NFQUEUE.c32
-rw-r--r--net/netfilter/xt_NOTRACK.c29
-rw-r--r--net/netfilter/xt_RATEEST.c205
-rw-r--r--net/netfilter/xt_SECMARK.c53
-rw-r--r--net/netfilter/xt_TCPMSS.c160
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c147
-rw-r--r--net/netfilter/xt_TRACE.c29
-rw-r--r--net/netfilter/xt_comment.c33
-rw-r--r--net/netfilter/xt_connbytes.c56
-rw-r--r--net/netfilter/xt_connlimit.c86
-rw-r--r--net/netfilter/xt_connmark.c129
-rw-r--r--net/netfilter/xt_conntrack.c277
-rw-r--r--net/netfilter/xt_dccp.c43
-rw-r--r--net/netfilter/xt_dscp.c112
-rw-r--r--net/netfilter/xt_esp.c43
-rw-r--r--net/netfilter/xt_hashlimit.c404
-rw-r--r--net/netfilter/xt_helper.c60
-rw-r--r--net/netfilter/xt_iprange.c180
-rw-r--r--net/netfilter/xt_length.c45
-rw-r--r--net/netfilter/xt_limit.c57
-rw-r--r--net/netfilter/xt_mac.c43
-rw-r--r--net/netfilter/xt_mark.c100
-rw-r--r--net/netfilter/xt_multiport.c100
-rw-r--r--net/netfilter/xt_owner.c213
-rw-r--r--net/netfilter/xt_physdev.c51
-rw-r--r--net/netfilter/xt_pkttype.c51
-rw-r--r--net/netfilter/xt_policy.c67
-rw-r--r--net/netfilter/xt_quota.c37
-rw-r--r--net/netfilter/xt_rateest.c178
-rw-r--r--net/netfilter/xt_realm.c34
-rw-r--r--net/netfilter/xt_sctp.c43
-rw-r--r--net/netfilter/xt_state.c52
-rw-r--r--net/netfilter/xt_statistic.c42
-rw-r--r--net/netfilter/xt_string.c54
-rw-r--r--net/netfilter/xt_tcpmss.c33
-rw-r--r--net/netfilter/xt_tcpudp.c79
-rw-r--r--net/netfilter/xt_time.c45
-rw-r--r--net/netfilter/xt_u32.c34
-rw-r--r--net/netlabel/netlabel_cipso_v4.c5
-rw-r--r--net/netlabel/netlabel_domainhash.c77
-rw-r--r--net/netlabel/netlabel_kapi.c21
-rw-r--r--net/netlabel/netlabel_mgmt.c63
-rw-r--r--net/netlabel/netlabel_mgmt.h7
-rw-r--r--net/netlabel/netlabel_unlabeled.c1565
-rw-r--r--net/netlabel/netlabel_unlabeled.h145
-rw-r--r--net/netlink/af_netlink.c205
-rw-r--r--net/netlink/attr.c19
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_dev.c2
-rw-r--r--net/netrom/nr_timer.c19
-rw-r--r--net/netrom/sysctl_net_netrom.c24
-rw-r--r--net/packet/af_packet.c148
-rw-r--r--net/rfkill/rfkill-input.c9
-rw-r--r--net/rfkill/rfkill.c59
-rw-r--r--net/rose/af_rose.c19
-rw-r--r--net/rose/rose_dev.c2
-rw-r--r--net/rose/rose_in.c2
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rose/sysctl_net_rose.c24
-rw-r--r--net/rxrpc/Kconfig1
-rw-r--r--net/rxrpc/af_rxrpc.c4
-rw-r--r--net/rxrpc/ar-call.c2
-rw-r--r--net/rxrpc/ar-connection.c2
-rw-r--r--net/rxrpc/ar-input.c8
-rw-r--r--net/rxrpc/ar-internal.h6
-rw-r--r--net/rxrpc/ar-local.c4
-rw-r--r--net/rxrpc/ar-peer.c2
-rw-r--r--net/rxrpc/ar-proc.c6
-rw-r--r--net/rxrpc/rxkad.c14
-rw-r--r--net/sched/Kconfig22
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c340
-rw-r--r--net/sched/act_gact.c40
-rw-r--r--net/sched/act_ipt.c55
-rw-r--r--net/sched/act_mirred.c27
-rw-r--r--net/sched/act_nat.c39
-rw-r--r--net/sched/act_pedit.c29
-rw-r--r--net/sched/act_police.c67
-rw-r--r--net/sched/act_simple.c33
-rw-r--r--net/sched/cls_api.c193
-rw-r--r--net/sched/cls_basic.c59
-rw-r--r--net/sched/cls_flow.c660
-rw-r--r--net/sched/cls_fw.c70
-rw-r--r--net/sched/cls_route.c86
-rw-r--r--net/sched/cls_rsvp.h69
-rw-r--r--net/sched/cls_tcindex.c143
-rw-r--r--net/sched/cls_u32.c119
-rw-r--r--net/sched/em_meta.c64
-rw-r--r--net/sched/em_text.c9
-rw-r--r--net/sched/ematch.c98
-rw-r--r--net/sched/sch_api.c134
-rw-r--r--net/sched/sch_atm.c208
-rw-r--r--net/sched/sch_blackhole.c2
-rw-r--r--net/sched/sch_cbq.c183
-rw-r--r--net/sched/sch_dsmark.c276
-rw-r--r--net/sched/sch_fifo.c17
-rw-r--r--net/sched/sch_generic.c51
-rw-r--r--net/sched/sch_gred.c77
-rw-r--r--net/sched/sch_hfsc.c94
-rw-r--r--net/sched/sch_htb.c112
-rw-r--r--net/sched/sch_ingress.c319
-rw-r--r--net/sched/sch_netem.c143
-rw-r--r--net/sched/sch_prio.c41
-rw-r--r--net/sched/sch_red.c64
-rw-r--r--net/sched/sch_sfq.c200
-rw-r--r--net/sched/sch_tbf.c68
-rw-r--r--net/sched/sch_teql.c7
-rw-r--r--net/sctp/Kconfig7
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/associola.c93
-rw-r--r--net/sctp/auth.c8
-rw-r--r--net/sctp/bind_addr.c74
-rw-r--r--net/sctp/crc32c.c222
-rw-r--r--net/sctp/endpointola.c35
-rw-r--r--net/sctp/input.c168
-rw-r--r--net/sctp/inqueue.c4
-rw-r--r--net/sctp/ipv6.c9
-rw-r--r--net/sctp/output.c1
-rw-r--r--net/sctp/outqueue.c70
-rw-r--r--net/sctp/proc.c6
-rw-r--r--net/sctp/protocol.c38
-rw-r--r--net/sctp/sm_make_chunk.c335
-rw-r--r--net/sctp/sm_sideeffect.c10
-rw-r--r--net/sctp/sm_statefuns.c186
-rw-r--r--net/sctp/sm_statetable.c18
-rw-r--r--net/sctp/socket.c62
-rw-r--r--net/sctp/sysctl.c29
-rw-r--r--net/sctp/transport.c18
-rw-r--r--net/sctp/ulpevent.c4
-rw-r--r--net/sctp/ulpqueue.c37
-rw-r--r--net/socket.c66
-rw-r--r--net/sunrpc/Makefile3
-rw-r--r--net/sunrpc/auth.c10
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c19
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c8
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c93
-rw-r--r--net/sunrpc/cache.c154
-rw-r--r--net/sunrpc/clnt.c217
-rw-r--r--net/sunrpc/rpc_pipe.c61
-rw-r--r--net/sunrpc/rpcb_clnt.c122
-rw-r--r--net/sunrpc/sched.c124
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/stats.c15
-rw-r--r--net/sunrpc/sunrpc_syms.c118
-rw-r--r--net/sunrpc/svc.c91
-rw-r--r--net/sunrpc/svc_xprt.c1055
-rw-r--r--net/sunrpc/svcauth.c6
-rw-r--r--net/sunrpc/svcauth_unix.c59
-rw-r--r--net/sunrpc/svcsock.c1311
-rw-r--r--net/sunrpc/sysctl.c41
-rw-r--r--net/sunrpc/xdr.c24
-rw-r--r--net/sunrpc/xprt.c61
-rw-r--r--net/sunrpc/xprtrdma/Makefile5
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c55
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c266
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c412
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c586
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c520
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1080
-rw-r--r--net/sunrpc/xprtrdma/transport.c39
-rw-r--r--net/sunrpc/xprtrdma/verbs.c8
-rw-r--r--net/sunrpc/xprtsock.c236
-rw-r--r--net/sysctl_net.c79
-rw-r--r--net/tipc/core.h4
-rw-r--r--net/tipc/port.c20
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/unix/af_unix.c191
-rw-r--r--net/unix/garbage.c26
-rw-r--r--net/unix/sysctl_net_unix.c52
-rw-r--r--net/wireless/Kconfig10
-rw-r--r--net/wireless/core.c3
-rw-r--r--net/wireless/nl80211.c737
-rw-r--r--net/wireless/wext.c45
-rw-r--r--net/x25/af_x25.c8
-rw-r--r--net/x25/sysctl_net_x25.c24
-rw-r--r--net/x25/x25_facilities.c4
-rw-r--r--net/x25/x25_forward.c7
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/x25/x25_link.c7
-rw-r--r--net/x25/x25_proc.c8
-rw-r--r--net/x25/x25_route.c2
-rw-r--r--net/x25/x25_subr.c2
-rw-r--r--net/x25/x25_timer.c4
-rw-r--r--net/xfrm/Kconfig11
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_algo.c147
-rw-r--r--net/xfrm/xfrm_hash.c9
-rw-r--r--net/xfrm/xfrm_input.c177
-rw-r--r--net/xfrm/xfrm_output.c155
-rw-r--r--net/xfrm/xfrm_policy.c441
-rw-r--r--net/xfrm/xfrm_proc.c97
-rw-r--r--net/xfrm/xfrm_state.c242
-rw-r--r--net/xfrm/xfrm_user.c90
639 files changed, 41962 insertions, 22427 deletions
diff --git a/net/802/Makefile b/net/802/Makefile
index 977704a54f68..68569ffddea1 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -3,9 +3,8 @@
3# 3#
4 4
5# Check the p8022 selections against net/core/Makefile. 5# Check the p8022 selections against net/core/Makefile.
6obj-$(CONFIG_SYSCTL) += sysctl_net_802.o
7obj-$(CONFIG_LLC) += p8022.o psnap.o 6obj-$(CONFIG_LLC) += p8022.o psnap.o
8obj-$(CONFIG_TR) += p8022.o psnap.o tr.o sysctl_net_802.o 7obj-$(CONFIG_TR) += p8022.o psnap.o tr.o
9obj-$(CONFIG_NET_FC) += fc.o 8obj-$(CONFIG_NET_FC) += fc.o
10obj-$(CONFIG_FDDI) += fddi.o 9obj-$(CONFIG_FDDI) += fddi.o
11obj-$(CONFIG_HIPPI) += hippi.o 10obj-$(CONFIG_HIPPI) += hippi.o
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c
deleted file mode 100644
index ead56037398b..000000000000
--- a/net/802/sysctl_net_802.c
+++ /dev/null
@@ -1,33 +0,0 @@
1/* -*- linux-c -*-
2 * sysctl_net_802.c: sysctl interface to net 802 subsystem.
3 *
4 * Begun April 1, 1996, Mike Shaver.
5 * Added /proc/sys/net/802 directory entry (empty =) ). [MS]
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/mm.h>
14#include <linux/if_tr.h>
15#include <linux/sysctl.h>
16
17#ifdef CONFIG_TR
18extern int sysctl_tr_rif_timeout;
19#endif
20
21struct ctl_table tr_table[] = {
22#ifdef CONFIG_TR
23 {
24 .ctl_name = NET_TR_RIF_TIMEOUT,
25 .procname = "rif_timeout",
26 .data = &sysctl_tr_rif_timeout,
27 .maxlen = sizeof(int),
28 .mode = 0644,
29 .proc_handler = &proc_dointvec
30 },
31#endif /* CONFIG_TR */
32 { 0 },
33};
diff --git a/net/802/tr.c b/net/802/tr.c
index a2bd0f2e3af8..18c66475d8c3 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -35,6 +35,7 @@
35#include <linux/proc_fs.h> 35#include <linux/proc_fs.h>
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/sysctl.h>
38#include <net/arp.h> 39#include <net/arp.h>
39#include <net/net_namespace.h> 40#include <net/net_namespace.h>
40 41
@@ -75,7 +76,7 @@ static DEFINE_SPINLOCK(rif_lock);
75 76
76static struct timer_list rif_timer; 77static struct timer_list rif_timer;
77 78
78int sysctl_tr_rif_timeout = 60*10*HZ; 79static int sysctl_tr_rif_timeout = 60*10*HZ;
79 80
80static inline unsigned long rif_hash(const unsigned char *addr) 81static inline unsigned long rif_hash(const unsigned char *addr)
81{ 82{
@@ -634,6 +635,26 @@ struct net_device *alloc_trdev(int sizeof_priv)
634 return alloc_netdev(sizeof_priv, "tr%d", tr_setup); 635 return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
635} 636}
636 637
638#ifdef CONFIG_SYSCTL
639static struct ctl_table tr_table[] = {
640 {
641 .ctl_name = NET_TR_RIF_TIMEOUT,
642 .procname = "rif_timeout",
643 .data = &sysctl_tr_rif_timeout,
644 .maxlen = sizeof(int),
645 .mode = 0644,
646 .proc_handler = &proc_dointvec
647 },
648 { 0 },
649};
650
651static __initdata struct ctl_path tr_path[] = {
652 { .procname = "net", .ctl_name = CTL_NET, },
653 { .procname = "token-ring", .ctl_name = NET_TR, },
654 { }
655};
656#endif
657
637/* 658/*
638 * Called during bootup. We don't actually have to initialise 659 * Called during bootup. We don't actually have to initialise
639 * too much for this. 660 * too much for this.
@@ -641,12 +662,12 @@ struct net_device *alloc_trdev(int sizeof_priv)
641 662
642static int __init rif_init(void) 663static int __init rif_init(void)
643{ 664{
644 init_timer(&rif_timer); 665 rif_timer.expires = jiffies + sysctl_tr_rif_timeout;
645 rif_timer.expires = sysctl_tr_rif_timeout; 666 setup_timer(&rif_timer, rif_check_expire, 0);
646 rif_timer.data = 0L;
647 rif_timer.function = rif_check_expire;
648 add_timer(&rif_timer); 667 add_timer(&rif_timer);
649 668#ifdef CONFIG_SYSCTL
669 register_sysctl_paths(tr_path, tr_table);
670#endif
650 proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops); 671 proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
651 return 0; 672 return 0;
652} 673}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3fe4fc86055f..dbc81b965096 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -3,7 +3,7 @@
3 * Ethernet-type device handling. 3 * Ethernet-type device handling.
4 * 4 *
5 * Authors: Ben Greear <greearb@candelatech.com> 5 * Authors: Ben Greear <greearb@candelatech.com>
6 * Please send support related email to: vlan@scry.wanfear.com 6 * Please send support related email to: netdev@vger.kernel.org
7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html 7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
8 * 8 *
9 * Fixes: 9 * Fixes:
@@ -43,23 +43,12 @@
43 43
44/* Our listing of VLAN group(s) */ 44/* Our listing of VLAN group(s) */
45static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]; 45static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
46#define vlan_grp_hashfn(IDX) ((((IDX) >> VLAN_GRP_HASH_SHIFT) ^ (IDX)) & VLAN_GRP_HASH_MASK)
47 46
48static char vlan_fullname[] = "802.1Q VLAN Support"; 47static char vlan_fullname[] = "802.1Q VLAN Support";
49static char vlan_version[] = DRV_VERSION; 48static char vlan_version[] = DRV_VERSION;
50static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; 49static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
51static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; 50static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
52 51
53static int vlan_device_event(struct notifier_block *, unsigned long, void *);
54static int vlan_ioctl_handler(struct net *net, void __user *);
55static int unregister_vlan_dev(struct net_device *, unsigned short );
56
57static struct notifier_block vlan_notifier_block = {
58 .notifier_call = vlan_device_event,
59};
60
61/* These may be changed at run-time through IOCTLs */
62
63/* Determines interface naming scheme. */ 52/* Determines interface naming scheme. */
64unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD; 53unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD;
65 54
@@ -70,82 +59,11 @@ static struct packet_type vlan_packet_type = {
70 59
71/* End of global variables definitions. */ 60/* End of global variables definitions. */
72 61
73/* 62static inline unsigned int vlan_grp_hashfn(unsigned int idx)
74 * Function vlan_proto_init (pro)
75 *
76 * Initialize VLAN protocol layer,
77 *
78 */
79static int __init vlan_proto_init(void)
80{
81 int err;
82
83 printk(VLAN_INF "%s v%s %s\n",
84 vlan_fullname, vlan_version, vlan_copyright);
85 printk(VLAN_INF "All bugs added by %s\n",
86 vlan_buggyright);
87
88 /* proc file system initialization */
89 err = vlan_proc_init();
90 if (err < 0) {
91 printk(KERN_ERR
92 "%s %s: can't create entry in proc filesystem!\n",
93 __FUNCTION__, VLAN_NAME);
94 return err;
95 }
96
97 dev_add_pack(&vlan_packet_type);
98
99 /* Register us to receive netdevice events */
100 err = register_netdevice_notifier(&vlan_notifier_block);
101 if (err < 0)
102 goto err1;
103
104 err = vlan_netlink_init();
105 if (err < 0)
106 goto err2;
107
108 vlan_ioctl_set(vlan_ioctl_handler);
109 return 0;
110
111err2:
112 unregister_netdevice_notifier(&vlan_notifier_block);
113err1:
114 vlan_proc_cleanup();
115 dev_remove_pack(&vlan_packet_type);
116 return err;
117}
118
119/*
120 * Module 'remove' entry point.
121 * o delete /proc/net/router directory and static entries.
122 */
123static void __exit vlan_cleanup_module(void)
124{ 63{
125 int i; 64 return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
126
127 vlan_netlink_fini();
128 vlan_ioctl_set(NULL);
129
130 /* Un-register us from receiving netdevice events */
131 unregister_netdevice_notifier(&vlan_notifier_block);
132
133 dev_remove_pack(&vlan_packet_type);
134
135 /* This table must be empty if there are no module
136 * references left.
137 */
138 for (i = 0; i < VLAN_GRP_HASH_SIZE; i++) {
139 BUG_ON(!hlist_empty(&vlan_group_hash[i]));
140 }
141 vlan_proc_cleanup();
142
143 synchronize_net();
144} 65}
145 66
146module_init(vlan_proto_init);
147module_exit(vlan_cleanup_module);
148
149/* Must be invoked with RCU read lock (no preempt) */ 67/* Must be invoked with RCU read lock (no preempt) */
150static struct vlan_group *__vlan_find_group(int real_dev_ifindex) 68static struct vlan_group *__vlan_find_group(int real_dev_ifindex)
151{ 69{
@@ -180,7 +98,7 @@ static void vlan_group_free(struct vlan_group *grp)
180{ 98{
181 int i; 99 int i;
182 100
183 for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) 101 for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
184 kfree(grp->vlan_devices_arrays[i]); 102 kfree(grp->vlan_devices_arrays[i]);
185 kfree(grp); 103 kfree(grp);
186} 104}
@@ -218,173 +136,50 @@ static void vlan_rcu_free(struct rcu_head *rcu)
218 vlan_group_free(container_of(rcu, struct vlan_group, rcu)); 136 vlan_group_free(container_of(rcu, struct vlan_group, rcu));
219} 137}
220 138
221 139void unregister_vlan_dev(struct net_device *dev)
222/* This returns 0 if everything went fine.
223 * It will return 1 if the group was killed as a result.
224 * A negative return indicates failure.
225 *
226 * The RTNL lock must be held.
227 */
228static int unregister_vlan_dev(struct net_device *real_dev,
229 unsigned short vlan_id)
230{ 140{
231 struct net_device *dev = NULL; 141 struct vlan_dev_info *vlan = vlan_dev_info(dev);
232 int real_dev_ifindex = real_dev->ifindex; 142 struct net_device *real_dev = vlan->real_dev;
233 struct vlan_group *grp; 143 struct vlan_group *grp;
234 int i, ret; 144 unsigned short vlan_id = vlan->vlan_id;
235
236#ifdef VLAN_DEBUG
237 printk(VLAN_DBG "%s: VID: %i\n", __FUNCTION__, vlan_id);
238#endif
239
240 /* sanity check */
241 if (vlan_id >= VLAN_VID_MASK)
242 return -EINVAL;
243 145
244 ASSERT_RTNL(); 146 ASSERT_RTNL();
245 grp = __vlan_find_group(real_dev_ifindex);
246
247 ret = 0;
248 147
249 if (grp) { 148 grp = __vlan_find_group(real_dev->ifindex);
250 dev = vlan_group_get_device(grp, vlan_id); 149 BUG_ON(!grp);
251 if (dev) {
252 /* Remove proc entry */
253 vlan_proc_rem_dev(dev);
254
255 /* Take it out of our own structures, but be sure to
256 * interlock with HW accelerating devices or SW vlan
257 * input packet processing.
258 */
259 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
260 real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
261
262 vlan_group_set_device(grp, vlan_id, NULL);
263 synchronize_net();
264 150
151 vlan_proc_rem_dev(dev);
265 152
266 /* Caller unregisters (and if necessary, puts) 153 /* Take it out of our own structures, but be sure to interlock with
267 * VLAN device, but we get rid of the reference to 154 * HW accelerating devices or SW vlan input packet processing.
268 * real_dev here. 155 */
269 */ 156 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
270 dev_put(real_dev); 157 real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
271 158
272 /* If the group is now empty, kill off the 159 vlan_group_set_device(grp, vlan_id, NULL);
273 * group. 160 grp->nr_vlans--;
274 */
275 for (i = 0; i < VLAN_VID_MASK; i++)
276 if (vlan_group_get_device(grp, i))
277 break;
278 161
279 if (i == VLAN_VID_MASK) { 162 synchronize_net();
280 if (real_dev->features & NETIF_F_HW_VLAN_RX)
281 real_dev->vlan_rx_register(real_dev, NULL);
282 163
283 hlist_del_rcu(&grp->hlist); 164 /* If the group is now empty, kill off the group. */
165 if (grp->nr_vlans == 0) {
166 if (real_dev->features & NETIF_F_HW_VLAN_RX)
167 real_dev->vlan_rx_register(real_dev, NULL);
284 168
285 /* Free the group, after all cpu's are done. */ 169 hlist_del_rcu(&grp->hlist);
286 call_rcu(&grp->rcu, vlan_rcu_free);
287 170
288 grp = NULL; 171 /* Free the group, after all cpu's are done. */
289 ret = 1; 172 call_rcu(&grp->rcu, vlan_rcu_free);
290 }
291 }
292 } 173 }
293 174
294 return ret; 175 /* Get rid of the vlan's reference to real_dev */
295} 176 dev_put(real_dev);
296
297int unregister_vlan_device(struct net_device *dev)
298{
299 int ret;
300 177
301 ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
302 VLAN_DEV_INFO(dev)->vlan_id);
303 unregister_netdevice(dev); 178 unregister_netdevice(dev);
304
305 if (ret == 1)
306 ret = 0;
307 return ret;
308} 179}
309 180
310/* 181static void vlan_transfer_operstate(const struct net_device *dev,
311 * vlan network devices have devices nesting below it, and are a special 182 struct net_device *vlandev)
312 * "super class" of normal network devices; split their locks off into a
313 * separate class since they always nest.
314 */
315static struct lock_class_key vlan_netdev_xmit_lock_key;
316
317static const struct header_ops vlan_header_ops = {
318 .create = vlan_dev_hard_header,
319 .rebuild = vlan_dev_rebuild_header,
320 .parse = eth_header_parse,
321};
322
323static int vlan_dev_init(struct net_device *dev)
324{
325 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
326
327 /* IFF_BROADCAST|IFF_MULTICAST; ??? */
328 dev->flags = real_dev->flags & ~IFF_UP;
329 dev->iflink = real_dev->ifindex;
330 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
331 (1<<__LINK_STATE_DORMANT))) |
332 (1<<__LINK_STATE_PRESENT);
333
334 /* ipv6 shared card related stuff */
335 dev->dev_id = real_dev->dev_id;
336
337 if (is_zero_ether_addr(dev->dev_addr))
338 memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
339 if (is_zero_ether_addr(dev->broadcast))
340 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
341
342 if (real_dev->features & NETIF_F_HW_VLAN_TX) {
343 dev->header_ops = real_dev->header_ops;
344 dev->hard_header_len = real_dev->hard_header_len;
345 dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
346 } else {
347 dev->header_ops = &vlan_header_ops;
348 dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
349 dev->hard_start_xmit = vlan_dev_hard_start_xmit;
350 }
351
352 lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
353 return 0;
354}
355
356void vlan_setup(struct net_device *new_dev)
357{
358 ether_setup(new_dev);
359
360 /* new_dev->ifindex = 0; it will be set when added to
361 * the global list.
362 * iflink is set as well.
363 */
364 new_dev->get_stats = vlan_dev_get_stats;
365
366 /* Make this thing known as a VLAN device */
367 new_dev->priv_flags |= IFF_802_1Q_VLAN;
368
369 /* Set us up to have no queue, as the underlying Hardware device
370 * can do all the queueing we could want.
371 */
372 new_dev->tx_queue_len = 0;
373
374 /* set up method calls */
375 new_dev->change_mtu = vlan_dev_change_mtu;
376 new_dev->init = vlan_dev_init;
377 new_dev->open = vlan_dev_open;
378 new_dev->stop = vlan_dev_stop;
379 new_dev->set_multicast_list = vlan_dev_set_multicast_list;
380 new_dev->change_rx_flags = vlan_change_rx_flags;
381 new_dev->destructor = free_netdev;
382 new_dev->do_ioctl = vlan_dev_ioctl;
383
384 memset(new_dev->broadcast, 0, ETH_ALEN);
385}
386
387static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev)
388{ 183{
389 /* Have to respect userspace enforced dormant state 184 /* Have to respect userspace enforced dormant state
390 * of real device, also must allow supplicant running 185 * of real device, also must allow supplicant running
@@ -406,23 +201,22 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
406 201
407int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) 202int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
408{ 203{
204 char *name = real_dev->name;
205
409 if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { 206 if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
410 printk(VLAN_DBG "%s: VLANs not supported on %s.\n", 207 pr_info("8021q: VLANs not supported on %s\n", name);
411 __FUNCTION__, real_dev->name);
412 return -EOPNOTSUPP; 208 return -EOPNOTSUPP;
413 } 209 }
414 210
415 if ((real_dev->features & NETIF_F_HW_VLAN_RX) && 211 if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
416 !real_dev->vlan_rx_register) { 212 !real_dev->vlan_rx_register) {
417 printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", 213 pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
418 __FUNCTION__, real_dev->name);
419 return -EOPNOTSUPP; 214 return -EOPNOTSUPP;
420 } 215 }
421 216
422 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && 217 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
423 (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { 218 (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) {
424 printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", 219 pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
425 __FUNCTION__, real_dev->name);
426 return -EOPNOTSUPP; 220 return -EOPNOTSUPP;
427 } 221 }
428 222
@@ -432,18 +226,15 @@ int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
432 if (!(real_dev->flags & IFF_UP)) 226 if (!(real_dev->flags & IFF_UP))
433 return -ENETDOWN; 227 return -ENETDOWN;
434 228
435 if (__find_vlan_dev(real_dev, vlan_id) != NULL) { 229 if (__find_vlan_dev(real_dev, vlan_id) != NULL)
436 /* was already registered. */
437 printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
438 return -EEXIST; 230 return -EEXIST;
439 }
440 231
441 return 0; 232 return 0;
442} 233}
443 234
444int register_vlan_dev(struct net_device *dev) 235int register_vlan_dev(struct net_device *dev)
445{ 236{
446 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 237 struct vlan_dev_info *vlan = vlan_dev_info(dev);
447 struct net_device *real_dev = vlan->real_dev; 238 struct net_device *real_dev = vlan->real_dev;
448 unsigned short vlan_id = vlan->vlan_id; 239 unsigned short vlan_id = vlan->vlan_id;
449 struct vlan_group *grp, *ngrp = NULL; 240 struct vlan_group *grp, *ngrp = NULL;
@@ -470,14 +261,16 @@ int register_vlan_dev(struct net_device *dev)
470 * it into our local structure. 261 * it into our local structure.
471 */ 262 */
472 vlan_group_set_device(grp, vlan_id, dev); 263 vlan_group_set_device(grp, vlan_id, dev);
264 grp->nr_vlans++;
265
473 if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) 266 if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
474 real_dev->vlan_rx_register(real_dev, ngrp); 267 real_dev->vlan_rx_register(real_dev, ngrp);
475 if (real_dev->features & NETIF_F_HW_VLAN_FILTER) 268 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
476 real_dev->vlan_rx_add_vid(real_dev, vlan_id); 269 real_dev->vlan_rx_add_vid(real_dev, vlan_id);
477 270
478 if (vlan_proc_add_dev(dev) < 0) 271 if (vlan_proc_add_dev(dev) < 0)
479 printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", 272 pr_warning("8021q: failed to add proc entry for %s\n",
480 dev->name); 273 dev->name);
481 return 0; 274 return 0;
482 275
483out_free_group: 276out_free_group:
@@ -496,11 +289,6 @@ static int register_vlan_device(struct net_device *real_dev,
496 char name[IFNAMSIZ]; 289 char name[IFNAMSIZ];
497 int err; 290 int err;
498 291
499#ifdef VLAN_DEBUG
500 printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n",
501 __FUNCTION__, eth_IF_name, VLAN_ID);
502#endif
503
504 if (VLAN_ID >= VLAN_VID_MASK) 292 if (VLAN_ID >= VLAN_VID_MASK)
505 return -ERANGE; 293 return -ERANGE;
506 294
@@ -509,10 +297,6 @@ static int register_vlan_device(struct net_device *real_dev,
509 return err; 297 return err;
510 298
511 /* Gotta set up the fields for the device. */ 299 /* Gotta set up the fields for the device. */
512#ifdef VLAN_DEBUG
513 printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n",
514 vlan_name_type);
515#endif
516 switch (vlan_name_type) { 300 switch (vlan_name_type) {
517 case VLAN_NAME_TYPE_RAW_PLUS_VID: 301 case VLAN_NAME_TYPE_RAW_PLUS_VID:
518 /* name will look like: eth1.0005 */ 302 /* name will look like: eth1.0005 */
@@ -549,26 +333,16 @@ static int register_vlan_device(struct net_device *real_dev,
549 */ 333 */
550 new_dev->mtu = real_dev->mtu; 334 new_dev->mtu = real_dev->mtu;
551 335
552#ifdef VLAN_DEBUG 336 vlan_dev_info(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
553 printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); 337 vlan_dev_info(new_dev)->real_dev = real_dev;
554 VLAN_MEM_DBG("new_dev->priv malloc, addr: %p size: %i\n", 338 vlan_dev_info(new_dev)->dent = NULL;
555 new_dev->priv, 339 vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
556 sizeof(struct vlan_dev_info));
557#endif
558
559 VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
560 VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
561 VLAN_DEV_INFO(new_dev)->dent = NULL;
562 VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
563 340
564 new_dev->rtnl_link_ops = &vlan_link_ops; 341 new_dev->rtnl_link_ops = &vlan_link_ops;
565 err = register_vlan_dev(new_dev); 342 err = register_vlan_dev(new_dev);
566 if (err < 0) 343 if (err < 0)
567 goto out_free_newdev; 344 goto out_free_newdev;
568 345
569#ifdef VLAN_DEBUG
570 printk(VLAN_DBG "Allocated new device successfully, returning.\n");
571#endif
572 return 0; 346 return 0;
573 347
574out_free_newdev: 348out_free_newdev:
@@ -579,7 +353,7 @@ out_free_newdev:
579static void vlan_sync_address(struct net_device *dev, 353static void vlan_sync_address(struct net_device *dev,
580 struct net_device *vlandev) 354 struct net_device *vlandev)
581{ 355{
582 struct vlan_dev_info *vlan = VLAN_DEV_INFO(vlandev); 356 struct vlan_dev_info *vlan = vlan_dev_info(vlandev);
583 357
584 /* May be called without an actual change */ 358 /* May be called without an actual change */
585 if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr)) 359 if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
@@ -600,7 +374,8 @@ static void vlan_sync_address(struct net_device *dev,
600 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); 374 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
601} 375}
602 376
603static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 377static int vlan_device_event(struct notifier_block *unused, unsigned long event,
378 void *ptr)
604{ 379{
605 struct net_device *dev = ptr; 380 struct net_device *dev = ptr;
606 struct vlan_group *grp = __vlan_find_group(dev->ifindex); 381 struct vlan_group *grp = __vlan_find_group(dev->ifindex);
@@ -636,6 +411,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
636 if (!vlandev) 411 if (!vlandev)
637 continue; 412 continue;
638 413
414 flgs = vlandev->flags;
415 if (!(flgs & IFF_UP))
416 continue;
417
639 vlan_sync_address(dev, vlandev); 418 vlan_sync_address(dev, vlandev);
640 } 419 }
641 break; 420 break;
@@ -673,20 +452,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
673 case NETDEV_UNREGISTER: 452 case NETDEV_UNREGISTER:
674 /* Delete all VLANs for this dev. */ 453 /* Delete all VLANs for this dev. */
675 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 454 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
676 int ret;
677
678 vlandev = vlan_group_get_device(grp, i); 455 vlandev = vlan_group_get_device(grp, i);
679 if (!vlandev) 456 if (!vlandev)
680 continue; 457 continue;
681 458
682 ret = unregister_vlan_dev(dev, 459 /* unregistration of last vlan destroys group, abort
683 VLAN_DEV_INFO(vlandev)->vlan_id); 460 * afterwards */
461 if (grp->nr_vlans == 1)
462 i = VLAN_GROUP_ARRAY_LEN;
684 463
685 unregister_netdevice(vlandev); 464 unregister_vlan_dev(vlandev);
686
687 /* Group was destroyed? */
688 if (ret == 1)
689 break;
690 } 465 }
691 break; 466 break;
692 } 467 }
@@ -695,6 +470,10 @@ out:
695 return NOTIFY_DONE; 470 return NOTIFY_DONE;
696} 471}
697 472
473static struct notifier_block vlan_notifier_block __read_mostly = {
474 .notifier_call = vlan_device_event,
475};
476
698/* 477/*
699 * VLAN IOCTL handler. 478 * VLAN IOCTL handler.
700 * o execute requested action or pass command to the device driver 479 * o execute requested action or pass command to the device driver
@@ -714,10 +493,6 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
714 args.device1[23] = 0; 493 args.device1[23] = 0;
715 args.u.device2[23] = 0; 494 args.u.device2[23] = 0;
716 495
717#ifdef VLAN_DEBUG
718 printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd);
719#endif
720
721 rtnl_lock(); 496 rtnl_lock();
722 497
723 switch (args.cmd) { 498 switch (args.cmd) {
@@ -747,6 +522,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
747 vlan_dev_set_ingress_priority(dev, 522 vlan_dev_set_ingress_priority(dev,
748 args.u.skb_priority, 523 args.u.skb_priority,
749 args.vlan_qos); 524 args.vlan_qos);
525 err = 0;
750 break; 526 break;
751 527
752 case SET_VLAN_EGRESS_PRIORITY_CMD: 528 case SET_VLAN_EGRESS_PRIORITY_CMD:
@@ -770,7 +546,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
770 case SET_VLAN_NAME_TYPE_CMD: 546 case SET_VLAN_NAME_TYPE_CMD:
771 err = -EPERM; 547 err = -EPERM;
772 if (!capable(CAP_NET_ADMIN)) 548 if (!capable(CAP_NET_ADMIN))
773 return -EPERM; 549 break;
774 if ((args.u.name_type >= 0) && 550 if ((args.u.name_type >= 0) &&
775 (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { 551 (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
776 vlan_name_type = args.u.name_type; 552 vlan_name_type = args.u.name_type;
@@ -791,36 +567,16 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
791 err = -EPERM; 567 err = -EPERM;
792 if (!capable(CAP_NET_ADMIN)) 568 if (!capable(CAP_NET_ADMIN))
793 break; 569 break;
794 err = unregister_vlan_device(dev); 570 unregister_vlan_dev(dev);
571 err = 0;
795 break; 572 break;
796 573
797 case GET_VLAN_INGRESS_PRIORITY_CMD:
798 /* TODO: Implement
799 err = vlan_dev_get_ingress_priority(args);
800 if (copy_to_user((void*)arg, &args,
801 sizeof(struct vlan_ioctl_args))) {
802 err = -EFAULT;
803 }
804 */
805 err = -EINVAL;
806 break;
807 case GET_VLAN_EGRESS_PRIORITY_CMD:
808 /* TODO: Implement
809 err = vlan_dev_get_egress_priority(args.device1, &(args.args);
810 if (copy_to_user((void*)arg, &args,
811 sizeof(struct vlan_ioctl_args))) {
812 err = -EFAULT;
813 }
814 */
815 err = -EINVAL;
816 break;
817 case GET_VLAN_REALDEV_NAME_CMD: 574 case GET_VLAN_REALDEV_NAME_CMD:
818 err = 0; 575 err = 0;
819 vlan_dev_get_realdev_name(dev, args.u.device2); 576 vlan_dev_get_realdev_name(dev, args.u.device2);
820 if (copy_to_user(arg, &args, 577 if (copy_to_user(arg, &args,
821 sizeof(struct vlan_ioctl_args))) { 578 sizeof(struct vlan_ioctl_args)))
822 err = -EFAULT; 579 err = -EFAULT;
823 }
824 break; 580 break;
825 581
826 case GET_VLAN_VID_CMD: 582 case GET_VLAN_VID_CMD:
@@ -828,16 +584,12 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
828 vlan_dev_get_vid(dev, &vid); 584 vlan_dev_get_vid(dev, &vid);
829 args.u.VID = vid; 585 args.u.VID = vid;
830 if (copy_to_user(arg, &args, 586 if (copy_to_user(arg, &args,
831 sizeof(struct vlan_ioctl_args))) { 587 sizeof(struct vlan_ioctl_args)))
832 err = -EFAULT; 588 err = -EFAULT;
833 }
834 break; 589 break;
835 590
836 default: 591 default:
837 /* pass on to underlying device instead?? */ 592 err = -EOPNOTSUPP;
838 printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
839 __FUNCTION__, args.cmd);
840 err = -EINVAL;
841 break; 593 break;
842 } 594 }
843out: 595out:
@@ -845,5 +597,59 @@ out:
845 return err; 597 return err;
846} 598}
847 599
600static int __init vlan_proto_init(void)
601{
602 int err;
603
604 pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
605 pr_info("All bugs added by %s\n", vlan_buggyright);
606
607 err = vlan_proc_init();
608 if (err < 0)
609 goto err1;
610
611 err = register_netdevice_notifier(&vlan_notifier_block);
612 if (err < 0)
613 goto err2;
614
615 err = vlan_netlink_init();
616 if (err < 0)
617 goto err3;
618
619 dev_add_pack(&vlan_packet_type);
620 vlan_ioctl_set(vlan_ioctl_handler);
621 return 0;
622
623err3:
624 unregister_netdevice_notifier(&vlan_notifier_block);
625err2:
626 vlan_proc_cleanup();
627err1:
628 return err;
629}
630
631static void __exit vlan_cleanup_module(void)
632{
633 unsigned int i;
634
635 vlan_ioctl_set(NULL);
636 vlan_netlink_fini();
637
638 unregister_netdevice_notifier(&vlan_notifier_block);
639
640 dev_remove_pack(&vlan_packet_type);
641
642 /* This table must be empty if there are no module references left. */
643 for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
644 BUG_ON(!hlist_empty(&vlan_group_hash[i]));
645
646 vlan_proc_cleanup();
647
648 synchronize_net();
649}
650
651module_init(vlan_proto_init);
652module_exit(vlan_cleanup_module);
653
848MODULE_LICENSE("GPL"); 654MODULE_LICENSE("GPL");
849MODULE_VERSION(DRV_VERSION); 655MODULE_VERSION(DRV_VERSION);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index cf4a80d06b35..73efcc715ccb 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,31 +3,6 @@
3 3
4#include <linux/if_vlan.h> 4#include <linux/if_vlan.h>
5 5
6/* Uncomment this if you want debug traces to be shown. */
7/* #define VLAN_DEBUG */
8
9#define VLAN_ERR KERN_ERR
10#define VLAN_INF KERN_INFO
11#define VLAN_DBG KERN_ALERT /* change these... to debug, having a hard time
12 * changing the log level at run-time..for some reason.
13 */
14
15/*
16
17These I use for memory debugging. I feared a leak at one time, but
18I never found it..and the problem seems to have dissappeared. Still,
19I'll bet they might prove useful again... --Ben
20
21
22#define VLAN_MEM_DBG(x, y, z) printk(VLAN_DBG "%s: " x, __FUNCTION__, y, z);
23#define VLAN_FMEM_DBG(x, y) printk(VLAN_DBG "%s: " x, __FUNCTION__, y);
24*/
25
26/* This way they don't do anything! */
27#define VLAN_MEM_DBG(x, y, z)
28#define VLAN_FMEM_DBG(x, y)
29
30
31extern unsigned short vlan_name_type; 6extern unsigned short vlan_name_type;
32 7
33#define VLAN_GRP_HASH_SHIFT 5 8#define VLAN_GRP_HASH_SHIFT 5
@@ -45,22 +20,12 @@ extern unsigned short vlan_name_type;
45 * Must be invoked with rcu_read_lock (ie preempt disabled) 20 * Must be invoked with rcu_read_lock (ie preempt disabled)
46 * or with RTNL. 21 * or with RTNL.
47 */ 22 */
48struct net_device *__find_vlan_dev(struct net_device* real_dev, 23struct net_device *__find_vlan_dev(struct net_device *real_dev,
49 unsigned short VID); /* vlan.c */ 24 unsigned short VID); /* vlan.c */
50 25
51/* found in vlan_dev.c */ 26/* found in vlan_dev.c */
52int vlan_dev_rebuild_header(struct sk_buff *skb);
53int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, 27int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
54 struct packet_type *ptype, struct net_device *orig_dev); 28 struct packet_type *ptype, struct net_device *orig_dev);
55int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
56 unsigned short type, const void *daddr,
57 const void *saddr, unsigned len);
58int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
59int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
60int vlan_dev_change_mtu(struct net_device *dev, int new_mtu);
61int vlan_dev_open(struct net_device* dev);
62int vlan_dev_stop(struct net_device* dev);
63int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
64void vlan_dev_set_ingress_priority(const struct net_device *dev, 29void vlan_dev_set_ingress_priority(const struct net_device *dev,
65 u32 skb_prio, short vlan_prio); 30 u32 skb_prio, short vlan_prio);
66int vlan_dev_set_egress_priority(const struct net_device *dev, 31int vlan_dev_set_egress_priority(const struct net_device *dev,
@@ -69,13 +34,11 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
69 u32 flag, short flag_val); 34 u32 flag, short flag_val);
70void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); 35void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
71void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); 36void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
72void vlan_change_rx_flags(struct net_device *dev, int change);
73void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
74 37
75int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); 38int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
76void vlan_setup(struct net_device *dev); 39void vlan_setup(struct net_device *dev);
77int register_vlan_dev(struct net_device *dev); 40int register_vlan_dev(struct net_device *dev);
78int unregister_vlan_device(struct net_device *dev); 41void unregister_vlan_dev(struct net_device *dev);
79 42
80int vlan_netlink_init(void); 43int vlan_netlink_init(void);
81void vlan_netlink_fini(void); 44void vlan_netlink_fini(void);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1a1740aa9a8b..77f04e49a1a0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -3,7 +3,7 @@
3 * Ethernet-type device handling. 3 * Ethernet-type device handling.
4 * 4 *
5 * Authors: Ben Greear <greearb@candelatech.com> 5 * Authors: Ben Greear <greearb@candelatech.com>
6 * Please send support related email to: vlan@scry.wanfear.com 6 * Please send support related email to: netdev@vger.kernel.org
7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html 7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
8 * 8 *
9 * Fixes: Mar 22 2001: Martin Bokaemper <mbokaemper@unispherenetworks.com> 9 * Fixes: Mar 22 2001: Martin Bokaemper <mbokaemper@unispherenetworks.com>
@@ -47,7 +47,7 @@
47 * 47 *
48 * TODO: This needs a checkup, I'm ignorant here. --BLG 48 * TODO: This needs a checkup, I'm ignorant here. --BLG
49 */ 49 */
50int vlan_dev_rebuild_header(struct sk_buff *skb) 50static int vlan_dev_rebuild_header(struct sk_buff *skb)
51{ 51{
52 struct net_device *dev = skb->dev; 52 struct net_device *dev = skb->dev;
53 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); 53 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
@@ -60,9 +60,8 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
60 return arp_find(veth->h_dest, skb); 60 return arp_find(veth->h_dest, skb);
61#endif 61#endif
62 default: 62 default:
63 printk(VLAN_DBG 63 pr_debug("%s: unable to resolve type %X addresses.\n",
64 "%s: unable to resolve type %X addresses.\n", 64 dev->name, ntohs(veth->h_vlan_encapsulated_proto));
65 dev->name, ntohs(veth->h_vlan_encapsulated_proto));
66 65
67 memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); 66 memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
68 break; 67 break;
@@ -73,7 +72,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
73 72
74static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) 73static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
75{ 74{
76 if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { 75 if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
77 if (skb_shared(skb) || skb_cloned(skb)) { 76 if (skb_shared(skb) || skb_cloned(skb)) {
78 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 77 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
79 kfree_skb(skb); 78 kfree_skb(skb);
@@ -90,6 +89,40 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
90 return skb; 89 return skb;
91} 90}
92 91
92static inline void vlan_set_encap_proto(struct sk_buff *skb,
93 struct vlan_hdr *vhdr)
94{
95 __be16 proto;
96 unsigned char *rawp;
97
98 /*
99 * Was a VLAN packet, grab the encapsulated protocol, which the layer
100 * three protocols care about.
101 */
102
103 proto = vhdr->h_vlan_encapsulated_proto;
104 if (ntohs(proto) >= 1536) {
105 skb->protocol = proto;
106 return;
107 }
108
109 rawp = skb->data;
110 if (*(unsigned short *)rawp == 0xFFFF)
111 /*
112 * This is a magic hack to spot IPX packets. Older Novell
113 * breaks the protocol design and runs IPX over 802.3 without
114 * an 802.2 LLC layer. We look for FFFF which isn't a used
115 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
116 * but does for the rest.
117 */
118 skb->protocol = htons(ETH_P_802_3);
119 else
120 /*
121 * Real 802.2 LLC
122 */
123 skb->protocol = htons(ETH_P_802_2);
124}
125
93/* 126/*
94 * Determine the packet's protocol ID. The rule here is that we 127 * Determine the packet's protocol ID. The rule here is that we
95 * assume 802.3 if the type field is short enough to be a length. 128 * assume 802.3 if the type field is short enough to be a length.
@@ -107,115 +140,58 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
107 * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be 140 * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
108 * stored UNALIGNED in the memory. RISC systems don't like 141 * stored UNALIGNED in the memory. RISC systems don't like
109 * such cases very much... 142 * such cases very much...
110 * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be aligned, 143 * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
111 * so there doesn't need to be any of the unaligned stuff. It has 144 * aligned, so there doesn't need to be any of the unaligned
112 * been commented out now... --Ben 145 * stuff. It has been commented out now... --Ben
113 * 146 *
114 */ 147 */
115int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, 148int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
116 struct packet_type* ptype, struct net_device *orig_dev) 149 struct packet_type *ptype, struct net_device *orig_dev)
117{ 150{
118 unsigned char *rawp = NULL;
119 struct vlan_hdr *vhdr; 151 struct vlan_hdr *vhdr;
120 unsigned short vid; 152 unsigned short vid;
121 struct net_device_stats *stats; 153 struct net_device_stats *stats;
122 unsigned short vlan_TCI; 154 unsigned short vlan_TCI;
123 __be16 proto;
124
125 if (dev->nd_net != &init_net) {
126 kfree_skb(skb);
127 return -1;
128 }
129 155
130 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 156 if (dev->nd_net != &init_net)
131 return -1; 157 goto err_free;
132 158
133 if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) { 159 skb = skb_share_check(skb, GFP_ATOMIC);
134 kfree_skb(skb); 160 if (skb == NULL)
135 return -1; 161 goto err_free;
136 }
137 162
138 vhdr = (struct vlan_hdr *)(skb->data); 163 if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
164 goto err_free;
139 165
140 /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ 166 vhdr = (struct vlan_hdr *)skb->data;
141 vlan_TCI = ntohs(vhdr->h_vlan_TCI); 167 vlan_TCI = ntohs(vhdr->h_vlan_TCI);
142
143 vid = (vlan_TCI & VLAN_VID_MASK); 168 vid = (vlan_TCI & VLAN_VID_MASK);
144 169
145#ifdef VLAN_DEBUG
146 printk(VLAN_DBG "%s: skb: %p vlan_id: %hx\n",
147 __FUNCTION__, skb, vid);
148#endif
149
150 /* Ok, we will find the correct VLAN device, strip the header,
151 * and then go on as usual.
152 */
153
154 /* We have 12 bits of vlan ID.
155 *
156 * We must not drop allow preempt until we hold a
157 * reference to the device (netif_rx does that) or we
158 * fail.
159 */
160
161 rcu_read_lock(); 170 rcu_read_lock();
162 skb->dev = __find_vlan_dev(dev, vid); 171 skb->dev = __find_vlan_dev(dev, vid);
163 if (!skb->dev) { 172 if (!skb->dev) {
164 rcu_read_unlock(); 173 pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
165 174 __FUNCTION__, (unsigned int)vid, dev->name);
166#ifdef VLAN_DEBUG 175 goto err_unlock;
167 printk(VLAN_DBG "%s: ERROR: No net_device for VID: %i on dev: %s [%i]\n",
168 __FUNCTION__, (unsigned int)(vid), dev->name, dev->ifindex);
169#endif
170 kfree_skb(skb);
171 return -1;
172 } 176 }
173 177
174 skb->dev->last_rx = jiffies; 178 skb->dev->last_rx = jiffies;
175 179
176 /* Bump the rx counters for the VLAN device. */ 180 stats = &skb->dev->stats;
177 stats = vlan_dev_get_stats(skb->dev);
178 stats->rx_packets++; 181 stats->rx_packets++;
179 stats->rx_bytes += skb->len; 182 stats->rx_bytes += skb->len;
180 183
181 /* Take off the VLAN header (4 bytes currently) */
182 skb_pull_rcsum(skb, VLAN_HLEN); 184 skb_pull_rcsum(skb, VLAN_HLEN);
183 185
184 /* Ok, lets check to make sure the device (dev) we 186 skb->priority = vlan_get_ingress_priority(skb->dev,
185 * came in on is what this VLAN is attached to. 187 ntohs(vhdr->h_vlan_TCI));
186 */
187
188 if (dev != VLAN_DEV_INFO(skb->dev)->real_dev) {
189 rcu_read_unlock();
190 188
191#ifdef VLAN_DEBUG 189 pr_debug("%s: priority: %u for TCI: %hu\n",
192 printk(VLAN_DBG "%s: dropping skb: %p because came in on wrong device, dev: %s real_dev: %s, skb_dev: %s\n", 190 __FUNCTION__, skb->priority, ntohs(vhdr->h_vlan_TCI));
193 __FUNCTION__, skb, dev->name,
194 VLAN_DEV_INFO(skb->dev)->real_dev->name,
195 skb->dev->name);
196#endif
197 kfree_skb(skb);
198 stats->rx_errors++;
199 return -1;
200 }
201 191
202 /*
203 * Deal with ingress priority mapping.
204 */
205 skb->priority = vlan_get_ingress_priority(skb->dev, ntohs(vhdr->h_vlan_TCI));
206
207#ifdef VLAN_DEBUG
208 printk(VLAN_DBG "%s: priority: %lu for TCI: %hu (hbo)\n",
209 __FUNCTION__, (unsigned long)(skb->priority),
210 ntohs(vhdr->h_vlan_TCI));
211#endif
212
213 /* The ethernet driver already did the pkt_type calculations
214 * for us...
215 */
216 switch (skb->pkt_type) { 192 switch (skb->pkt_type) {
217 case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ 193 case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
218 // stats->broadcast ++; // no such counter :-( 194 /* stats->broadcast ++; // no such counter :-( */
219 break; 195 break;
220 196
221 case PACKET_MULTICAST: 197 case PACKET_MULTICAST:
@@ -224,109 +200,47 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
224 200
225 case PACKET_OTHERHOST: 201 case PACKET_OTHERHOST:
226 /* Our lower layer thinks this is not local, let's make sure. 202 /* Our lower layer thinks this is not local, let's make sure.
227 * This allows the VLAN to have a different MAC than the underlying 203 * This allows the VLAN to have a different MAC than the
228 * device, and still route correctly. 204 * underlying device, and still route correctly.
229 */ 205 */
230 if (!compare_ether_addr(eth_hdr(skb)->h_dest, skb->dev->dev_addr)) { 206 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
231 /* It is for our (changed) MAC-address! */ 207 skb->dev->dev_addr))
232 skb->pkt_type = PACKET_HOST; 208 skb->pkt_type = PACKET_HOST;
233 }
234 break; 209 break;
235 default: 210 default:
236 break; 211 break;
237 } 212 }
238 213
239 /* Was a VLAN packet, grab the encapsulated protocol, which the layer 214 vlan_set_encap_proto(skb, vhdr);
240 * three protocols care about.
241 */
242 /* proto = get_unaligned(&vhdr->h_vlan_encapsulated_proto); */
243 proto = vhdr->h_vlan_encapsulated_proto;
244
245 skb->protocol = proto;
246 if (ntohs(proto) >= 1536) {
247 /* place it back on the queue to be handled by
248 * true layer 3 protocols.
249 */
250
251 /* See if we are configured to re-write the VLAN header
252 * to make it look like ethernet...
253 */
254 skb = vlan_check_reorder_header(skb);
255
256 /* Can be null if skb-clone fails when re-ordering */
257 if (skb) {
258 netif_rx(skb);
259 } else {
260 /* TODO: Add a more specific counter here. */
261 stats->rx_errors++;
262 }
263 rcu_read_unlock();
264 return 0;
265 }
266
267 rawp = skb->data;
268
269 /*
270 * This is a magic hack to spot IPX packets. Older Novell breaks
271 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
272 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
273 * won't work for fault tolerant netware but does for the rest.
274 */
275 if (*(unsigned short *)rawp == 0xFFFF) {
276 skb->protocol = htons(ETH_P_802_3);
277 /* place it back on the queue to be handled by true layer 3 protocols.
278 */
279
280 /* See if we are configured to re-write the VLAN header
281 * to make it look like ethernet...
282 */
283 skb = vlan_check_reorder_header(skb);
284
285 /* Can be null if skb-clone fails when re-ordering */
286 if (skb) {
287 netif_rx(skb);
288 } else {
289 /* TODO: Add a more specific counter here. */
290 stats->rx_errors++;
291 }
292 rcu_read_unlock();
293 return 0;
294 }
295
296 /*
297 * Real 802.2 LLC
298 */
299 skb->protocol = htons(ETH_P_802_2);
300 /* place it back on the queue to be handled by upper layer protocols.
301 */
302 215
303 /* See if we are configured to re-write the VLAN header
304 * to make it look like ethernet...
305 */
306 skb = vlan_check_reorder_header(skb); 216 skb = vlan_check_reorder_header(skb);
307 217 if (!skb) {
308 /* Can be null if skb-clone fails when re-ordering */
309 if (skb) {
310 netif_rx(skb);
311 } else {
312 /* TODO: Add a more specific counter here. */
313 stats->rx_errors++; 218 stats->rx_errors++;
219 goto err_unlock;
314 } 220 }
221
222 netif_rx(skb);
315 rcu_read_unlock(); 223 rcu_read_unlock();
316 return 0; 224 return NET_RX_SUCCESS;
225
226err_unlock:
227 rcu_read_unlock();
228err_free:
229 kfree_skb(skb);
230 return NET_RX_DROP;
317} 231}
318 232
319static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev, 233static inline unsigned short
320 struct sk_buff* skb) 234vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
321{ 235{
322 struct vlan_priority_tci_mapping *mp = 236 struct vlan_priority_tci_mapping *mp;
323 VLAN_DEV_INFO(dev)->egress_priority_map[(skb->priority & 0xF)];
324 237
238 mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
325 while (mp) { 239 while (mp) {
326 if (mp->priority == skb->priority) { 240 if (mp->priority == skb->priority) {
327 return mp->vlan_qos; /* This should already be shifted to mask 241 return mp->vlan_qos; /* This should already be shifted
328 * correctly with the VLAN's TCI 242 * to mask correctly with the
329 */ 243 * VLAN's TCI */
330 } 244 }
331 mp = mp->next; 245 mp = mp->next;
332 } 246 }
@@ -342,20 +256,20 @@ static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev
342 * This is called when the SKB is moving down the stack towards the 256 * This is called when the SKB is moving down the stack towards the
343 * physical devices. 257 * physical devices.
344 */ 258 */
345int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, 259static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
346 unsigned short type, 260 unsigned short type,
347 const void *daddr, const void *saddr, unsigned len) 261 const void *daddr, const void *saddr,
262 unsigned int len)
348{ 263{
349 struct vlan_hdr *vhdr; 264 struct vlan_hdr *vhdr;
350 unsigned short veth_TCI = 0; 265 unsigned short veth_TCI = 0;
351 int rc = 0; 266 int rc = 0;
352 int build_vlan_header = 0; 267 int build_vlan_header = 0;
353 struct net_device *vdev = dev; /* save this for the bottom of the method */ 268 struct net_device *vdev = dev;
354 269
355#ifdef VLAN_DEBUG 270 pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
356 printk(VLAN_DBG "%s: skb: %p type: %hx len: %x vlan_id: %hx, daddr: %p\n", 271 __FUNCTION__, skb, type, len, vlan_dev_info(dev)->vlan_id,
357 __FUNCTION__, skb, type, len, VLAN_DEV_INFO(dev)->vlan_id, daddr); 272 daddr);
358#endif
359 273
360 /* build vlan header only if re_order_header flag is NOT set. This 274 /* build vlan header only if re_order_header flag is NOT set. This
361 * fixes some programs that get confused when they see a VLAN device 275 * fixes some programs that get confused when they see a VLAN device
@@ -365,7 +279,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
365 * header shuffling in the hard_start_xmit. Users can turn off this 279 * header shuffling in the hard_start_xmit. Users can turn off this
366 * REORDER behaviour with the vconfig tool. 280 * REORDER behaviour with the vconfig tool.
367 */ 281 */
368 if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR)) 282 if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR))
369 build_vlan_header = 1; 283 build_vlan_header = 1;
370 284
371 if (build_vlan_header) { 285 if (build_vlan_header) {
@@ -373,29 +287,28 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
373 287
374 /* build the four bytes that make this a VLAN header. */ 288 /* build the four bytes that make this a VLAN header. */
375 289
376 /* Now, construct the second two bytes. This field looks something 290 /* Now, construct the second two bytes. This field looks
377 * like: 291 * something like:
378 * usr_priority: 3 bits (high bits) 292 * usr_priority: 3 bits (high bits)
379 * CFI 1 bit 293 * CFI 1 bit
380 * VLAN ID 12 bits (low bits) 294 * VLAN ID 12 bits (low bits)
381 * 295 *
382 */ 296 */
383 veth_TCI = VLAN_DEV_INFO(dev)->vlan_id; 297 veth_TCI = vlan_dev_info(dev)->vlan_id;
384 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); 298 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
385 299
386 vhdr->h_vlan_TCI = htons(veth_TCI); 300 vhdr->h_vlan_TCI = htons(veth_TCI);
387 301
388 /* 302 /*
389 * Set the protocol type. 303 * Set the protocol type. For a packet of type ETH_P_802_3 we
390 * For a packet of type ETH_P_802_3 we put the length in here instead. 304 * put the length in here instead. It is up to the 802.2
391 * It is up to the 802.2 layer to carry protocol information. 305 * layer to carry protocol information.
392 */ 306 */
393 307
394 if (type != ETH_P_802_3) { 308 if (type != ETH_P_802_3)
395 vhdr->h_vlan_encapsulated_proto = htons(type); 309 vhdr->h_vlan_encapsulated_proto = htons(type);
396 } else { 310 else
397 vhdr->h_vlan_encapsulated_proto = htons(len); 311 vhdr->h_vlan_encapsulated_proto = htons(len);
398 }
399 312
400 skb->protocol = htons(ETH_P_8021Q); 313 skb->protocol = htons(ETH_P_8021Q);
401 skb_reset_network_header(skb); 314 skb_reset_network_header(skb);
@@ -405,16 +318,16 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
405 if (saddr == NULL) 318 if (saddr == NULL)
406 saddr = dev->dev_addr; 319 saddr = dev->dev_addr;
407 320
408 dev = VLAN_DEV_INFO(dev)->real_dev; 321 dev = vlan_dev_info(dev)->real_dev;
409 322
410 /* MPLS can send us skbuffs w/out enough space. This check will grow the 323 /* MPLS can send us skbuffs w/out enough space. This check will grow
411 * skb if it doesn't have enough headroom. Not a beautiful solution, so 324 * the skb if it doesn't have enough headroom. Not a beautiful solution,
412 * I'll tick a counter so that users can know it's happening... If they 325 * so I'll tick a counter so that users can know it's happening...
413 * care... 326 * If they care...
414 */ 327 */
415 328
416 /* NOTE: This may still break if the underlying device is not the final 329 /* NOTE: This may still break if the underlying device is not the final
417 * device (and thus there are more headers to add...) It should work for 330 * device (and thus there are more headers to add...) It should work for
418 * good-ole-ethernet though. 331 * good-ole-ethernet though.
419 */ 332 */
420 if (skb_headroom(skb) < dev->hard_header_len) { 333 if (skb_headroom(skb) < dev->hard_header_len) {
@@ -422,14 +335,12 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
422 skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len); 335 skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len);
423 kfree_skb(sk_tmp); 336 kfree_skb(sk_tmp);
424 if (skb == NULL) { 337 if (skb == NULL) {
425 struct net_device_stats *stats = vlan_dev_get_stats(vdev); 338 struct net_device_stats *stats = &vdev->stats;
426 stats->tx_dropped++; 339 stats->tx_dropped++;
427 return -ENOMEM; 340 return -ENOMEM;
428 } 341 }
429 VLAN_DEV_INFO(vdev)->cnt_inc_headroom_on_tx++; 342 vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++;
430#ifdef VLAN_DEBUG 343 pr_debug("%s: %s: had to grow skb\n", __FUNCTION__, vdev->name);
431 printk(VLAN_DBG "%s: %s: had to grow skb.\n", __FUNCTION__, vdev->name);
432#endif
433 } 344 }
434 345
435 if (build_vlan_header) { 346 if (build_vlan_header) {
@@ -441,19 +352,19 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
441 else if (rc < 0) 352 else if (rc < 0)
442 rc -= VLAN_HLEN; 353 rc -= VLAN_HLEN;
443 } else 354 } else
444 /* If here, then we'll just make a normal looking ethernet frame, 355 /* If here, then we'll just make a normal looking ethernet
445 * but, the hard_start_xmit method will insert the tag (it has to 356 * frame, but, the hard_start_xmit method will insert the tag
446 * be able to do this for bridged and other skbs that don't come 357 * (it has to be able to do this for bridged and other skbs
447 * down the protocol stack in an orderly manner. 358 * that don't come down the protocol stack in an orderly manner.
448 */ 359 */
449 rc = dev_hard_header(skb, dev, type, daddr, saddr, len); 360 rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
450 361
451 return rc; 362 return rc;
452} 363}
453 364
454int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 365static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
455{ 366{
456 struct net_device_stats *stats = vlan_dev_get_stats(dev); 367 struct net_device_stats *stats = &dev->stats;
457 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); 368 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
458 369
459 /* Handle non-VLAN frames if they are sent to us, for example by DHCP. 370 /* Handle non-VLAN frames if they are sent to us, for example by DHCP.
@@ -462,24 +373,23 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
462 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... 373 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
463 */ 374 */
464 375
465 if (veth->h_vlan_proto != htons(ETH_P_8021Q)) { 376 if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
377 vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
466 int orig_headroom = skb_headroom(skb); 378 int orig_headroom = skb_headroom(skb);
467 unsigned short veth_TCI; 379 unsigned short veth_TCI;
468 380
469 /* This is not a VLAN frame...but we can fix that! */ 381 /* This is not a VLAN frame...but we can fix that! */
470 VLAN_DEV_INFO(dev)->cnt_encap_on_xmit++; 382 vlan_dev_info(dev)->cnt_encap_on_xmit++;
471 383
472#ifdef VLAN_DEBUG 384 pr_debug("%s: proto to encap: 0x%hx\n",
473 printk(VLAN_DBG "%s: proto to encap: 0x%hx (hbo)\n", 385 __FUNCTION__, htons(veth->h_vlan_proto));
474 __FUNCTION__, htons(veth->h_vlan_proto));
475#endif
476 /* Construct the second two bytes. This field looks something 386 /* Construct the second two bytes. This field looks something
477 * like: 387 * like:
478 * usr_priority: 3 bits (high bits) 388 * usr_priority: 3 bits (high bits)
479 * CFI 1 bit 389 * CFI 1 bit
480 * VLAN ID 12 bits (low bits) 390 * VLAN ID 12 bits (low bits)
481 */ 391 */
482 veth_TCI = VLAN_DEV_INFO(dev)->vlan_id; 392 veth_TCI = vlan_dev_info(dev)->vlan_id;
483 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); 393 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
484 394
485 skb = __vlan_put_tag(skb, veth_TCI); 395 skb = __vlan_put_tag(skb, veth_TCI);
@@ -488,32 +398,33 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
488 return 0; 398 return 0;
489 } 399 }
490 400
491 if (orig_headroom < VLAN_HLEN) { 401 if (orig_headroom < VLAN_HLEN)
492 VLAN_DEV_INFO(dev)->cnt_inc_headroom_on_tx++; 402 vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
493 }
494 } 403 }
495 404
496#ifdef VLAN_DEBUG 405 pr_debug("%s: about to send skb: %p to dev: %s\n",
497 printk(VLAN_DBG "%s: about to send skb: %p to dev: %s\n",
498 __FUNCTION__, skb, skb->dev->name); 406 __FUNCTION__, skb, skb->dev->name);
499 printk(VLAN_DBG " %2hx.%2hx.%2hx.%2xh.%2hx.%2hx %2hx.%2hx.%2hx.%2hx.%2hx.%2hx %4hx %4hx %4hx\n", 407 pr_debug(" " MAC_FMT " " MAC_FMT " %4hx %4hx %4hx\n",
500 veth->h_dest[0], veth->h_dest[1], veth->h_dest[2], veth->h_dest[3], veth->h_dest[4], veth->h_dest[5], 408 veth->h_dest[0], veth->h_dest[1], veth->h_dest[2],
501 veth->h_source[0], veth->h_source[1], veth->h_source[2], veth->h_source[3], veth->h_source[4], veth->h_source[5], 409 veth->h_dest[3], veth->h_dest[4], veth->h_dest[5],
502 veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); 410 veth->h_source[0], veth->h_source[1], veth->h_source[2],
503#endif 411 veth->h_source[3], veth->h_source[4], veth->h_source[5],
412 veth->h_vlan_proto, veth->h_vlan_TCI,
413 veth->h_vlan_encapsulated_proto);
504 414
505 stats->tx_packets++; /* for statics only */ 415 stats->tx_packets++; /* for statics only */
506 stats->tx_bytes += skb->len; 416 stats->tx_bytes += skb->len;
507 417
508 skb->dev = VLAN_DEV_INFO(dev)->real_dev; 418 skb->dev = vlan_dev_info(dev)->real_dev;
509 dev_queue_xmit(skb); 419 dev_queue_xmit(skb);
510 420
511 return 0; 421 return 0;
512} 422}
513 423
514int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 424static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
425 struct net_device *dev)
515{ 426{
516 struct net_device_stats *stats = vlan_dev_get_stats(dev); 427 struct net_device_stats *stats = &dev->stats;
517 unsigned short veth_TCI; 428 unsigned short veth_TCI;
518 429
519 /* Construct the second two bytes. This field looks something 430 /* Construct the second two bytes. This field looks something
@@ -522,25 +433,25 @@ int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev
522 * CFI 1 bit 433 * CFI 1 bit
523 * VLAN ID 12 bits (low bits) 434 * VLAN ID 12 bits (low bits)
524 */ 435 */
525 veth_TCI = VLAN_DEV_INFO(dev)->vlan_id; 436 veth_TCI = vlan_dev_info(dev)->vlan_id;
526 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); 437 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
527 skb = __vlan_hwaccel_put_tag(skb, veth_TCI); 438 skb = __vlan_hwaccel_put_tag(skb, veth_TCI);
528 439
529 stats->tx_packets++; 440 stats->tx_packets++;
530 stats->tx_bytes += skb->len; 441 stats->tx_bytes += skb->len;
531 442
532 skb->dev = VLAN_DEV_INFO(dev)->real_dev; 443 skb->dev = vlan_dev_info(dev)->real_dev;
533 dev_queue_xmit(skb); 444 dev_queue_xmit(skb);
534 445
535 return 0; 446 return 0;
536} 447}
537 448
538int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) 449static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
539{ 450{
540 /* TODO: gotta make sure the underlying layer can handle it, 451 /* TODO: gotta make sure the underlying layer can handle it,
541 * maybe an IFF_VLAN_CAPABLE flag for devices? 452 * maybe an IFF_VLAN_CAPABLE flag for devices?
542 */ 453 */
543 if (VLAN_DEV_INFO(dev)->real_dev->mtu < new_mtu) 454 if (vlan_dev_info(dev)->real_dev->mtu < new_mtu)
544 return -ERANGE; 455 return -ERANGE;
545 456
546 dev->mtu = new_mtu; 457 dev->mtu = new_mtu;
@@ -551,7 +462,7 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
551void vlan_dev_set_ingress_priority(const struct net_device *dev, 462void vlan_dev_set_ingress_priority(const struct net_device *dev,
552 u32 skb_prio, short vlan_prio) 463 u32 skb_prio, short vlan_prio)
553{ 464{
554 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 465 struct vlan_dev_info *vlan = vlan_dev_info(dev);
555 466
556 if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) 467 if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
557 vlan->nr_ingress_mappings--; 468 vlan->nr_ingress_mappings--;
@@ -564,7 +475,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
564int vlan_dev_set_egress_priority(const struct net_device *dev, 475int vlan_dev_set_egress_priority(const struct net_device *dev,
565 u32 skb_prio, short vlan_prio) 476 u32 skb_prio, short vlan_prio)
566{ 477{
567 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 478 struct vlan_dev_info *vlan = vlan_dev_info(dev);
568 struct vlan_priority_tci_mapping *mp = NULL; 479 struct vlan_priority_tci_mapping *mp = NULL;
569 struct vlan_priority_tci_mapping *np; 480 struct vlan_priority_tci_mapping *np;
570 u32 vlan_qos = (vlan_prio << 13) & 0xE000; 481 u32 vlan_qos = (vlan_prio << 13) & 0xE000;
@@ -604,30 +515,28 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
604{ 515{
605 /* verify flag is supported */ 516 /* verify flag is supported */
606 if (flag == VLAN_FLAG_REORDER_HDR) { 517 if (flag == VLAN_FLAG_REORDER_HDR) {
607 if (flag_val) { 518 if (flag_val)
608 VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR; 519 vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR;
609 } else { 520 else
610 VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; 521 vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
611 }
612 return 0; 522 return 0;
613 } 523 }
614 printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag);
615 return -EINVAL; 524 return -EINVAL;
616} 525}
617 526
618void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) 527void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
619{ 528{
620 strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); 529 strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
621} 530}
622 531
623void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) 532void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
624{ 533{
625 *result = VLAN_DEV_INFO(dev)->vlan_id; 534 *result = vlan_dev_info(dev)->vlan_id;
626} 535}
627 536
628int vlan_dev_open(struct net_device *dev) 537static int vlan_dev_open(struct net_device *dev)
629{ 538{
630 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 539 struct vlan_dev_info *vlan = vlan_dev_info(dev);
631 struct net_device *real_dev = vlan->real_dev; 540 struct net_device *real_dev = vlan->real_dev;
632 int err; 541 int err;
633 542
@@ -649,11 +558,12 @@ int vlan_dev_open(struct net_device *dev)
649 return 0; 558 return 0;
650} 559}
651 560
652int vlan_dev_stop(struct net_device *dev) 561static int vlan_dev_stop(struct net_device *dev)
653{ 562{
654 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; 563 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
655 564
656 dev_mc_unsync(real_dev, dev); 565 dev_mc_unsync(real_dev, dev);
566 dev_unicast_unsync(real_dev, dev);
657 if (dev->flags & IFF_ALLMULTI) 567 if (dev->flags & IFF_ALLMULTI)
658 dev_set_allmulti(real_dev, -1); 568 dev_set_allmulti(real_dev, -1);
659 if (dev->flags & IFF_PROMISC) 569 if (dev->flags & IFF_PROMISC)
@@ -665,16 +575,42 @@ int vlan_dev_stop(struct net_device *dev)
665 return 0; 575 return 0;
666} 576}
667 577
668int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 578static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
579{
580 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
581 struct sockaddr *addr = p;
582 int err;
583
584 if (!is_valid_ether_addr(addr->sa_data))
585 return -EADDRNOTAVAIL;
586
587 if (!(dev->flags & IFF_UP))
588 goto out;
589
590 if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
591 err = dev_unicast_add(real_dev, addr->sa_data, ETH_ALEN);
592 if (err < 0)
593 return err;
594 }
595
596 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
597 dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
598
599out:
600 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
601 return 0;
602}
603
604static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
669{ 605{
670 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; 606 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
671 struct ifreq ifrr; 607 struct ifreq ifrr;
672 int err = -EOPNOTSUPP; 608 int err = -EOPNOTSUPP;
673 609
674 strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ); 610 strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
675 ifrr.ifr_ifru = ifr->ifr_ifru; 611 ifrr.ifr_ifru = ifr->ifr_ifru;
676 612
677 switch(cmd) { 613 switch (cmd) {
678 case SIOCGMIIPHY: 614 case SIOCGMIIPHY:
679 case SIOCGMIIREG: 615 case SIOCGMIIREG:
680 case SIOCSMIIREG: 616 case SIOCSMIIREG:
@@ -689,9 +625,9 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
689 return err; 625 return err;
690} 626}
691 627
692void vlan_change_rx_flags(struct net_device *dev, int change) 628static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
693{ 629{
694 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; 630 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
695 631
696 if (change & IFF_ALLMULTI) 632 if (change & IFF_ALLMULTI)
697 dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); 633 dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -699,8 +635,80 @@ void vlan_change_rx_flags(struct net_device *dev, int change)
699 dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); 635 dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
700} 636}
701 637
702/** Taken from Gleb + Lennert's VLAN code, and modified... */ 638static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
703void vlan_dev_set_multicast_list(struct net_device *vlan_dev) 639{
640 dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
641 dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
642}
643
644/*
645 * vlan network devices have devices nesting below it, and are a special
646 * "super class" of normal network devices; split their locks off into a
647 * separate class since they always nest.
648 */
649static struct lock_class_key vlan_netdev_xmit_lock_key;
650
651static const struct header_ops vlan_header_ops = {
652 .create = vlan_dev_hard_header,
653 .rebuild = vlan_dev_rebuild_header,
654 .parse = eth_header_parse,
655};
656
657static int vlan_dev_init(struct net_device *dev)
658{
659 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
660 int subclass = 0;
661
662 /* IFF_BROADCAST|IFF_MULTICAST; ??? */
663 dev->flags = real_dev->flags & ~IFF_UP;
664 dev->iflink = real_dev->ifindex;
665 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
666 (1<<__LINK_STATE_DORMANT))) |
667 (1<<__LINK_STATE_PRESENT);
668
669 /* ipv6 shared card related stuff */
670 dev->dev_id = real_dev->dev_id;
671
672 if (is_zero_ether_addr(dev->dev_addr))
673 memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
674 if (is_zero_ether_addr(dev->broadcast))
675 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
676
677 if (real_dev->features & NETIF_F_HW_VLAN_TX) {
678 dev->header_ops = real_dev->header_ops;
679 dev->hard_header_len = real_dev->hard_header_len;
680 dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
681 } else {
682 dev->header_ops = &vlan_header_ops;
683 dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
684 dev->hard_start_xmit = vlan_dev_hard_start_xmit;
685 }
686
687 if (real_dev->priv_flags & IFF_802_1Q_VLAN)
688 subclass = 1;
689
690 lockdep_set_class_and_subclass(&dev->_xmit_lock,
691 &vlan_netdev_xmit_lock_key, subclass);
692 return 0;
693}
694
695void vlan_setup(struct net_device *dev)
704{ 696{
705 dev_mc_sync(VLAN_DEV_INFO(vlan_dev)->real_dev, vlan_dev); 697 ether_setup(dev);
698
699 dev->priv_flags |= IFF_802_1Q_VLAN;
700 dev->tx_queue_len = 0;
701
702 dev->change_mtu = vlan_dev_change_mtu;
703 dev->init = vlan_dev_init;
704 dev->open = vlan_dev_open;
705 dev->stop = vlan_dev_stop;
706 dev->set_mac_address = vlan_dev_set_mac_address;
707 dev->set_rx_mode = vlan_dev_set_rx_mode;
708 dev->set_multicast_list = vlan_dev_set_rx_mode;
709 dev->change_rx_flags = vlan_dev_change_rx_flags;
710 dev->do_ioctl = vlan_dev_ioctl;
711 dev->destructor = free_netdev;
712
713 memset(dev->broadcast, 0, ETH_ALEN);
706} 714}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 0996185e2ed5..e32eeb37987e 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -75,7 +75,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
75static int vlan_changelink(struct net_device *dev, 75static int vlan_changelink(struct net_device *dev,
76 struct nlattr *tb[], struct nlattr *data[]) 76 struct nlattr *tb[], struct nlattr *data[])
77{ 77{
78 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 78 struct vlan_dev_info *vlan = vlan_dev_info(dev);
79 struct ifla_vlan_flags *flags; 79 struct ifla_vlan_flags *flags;
80 struct ifla_vlan_qos_mapping *m; 80 struct ifla_vlan_qos_mapping *m;
81 struct nlattr *attr; 81 struct nlattr *attr;
@@ -104,7 +104,7 @@ static int vlan_changelink(struct net_device *dev,
104static int vlan_newlink(struct net_device *dev, 104static int vlan_newlink(struct net_device *dev,
105 struct nlattr *tb[], struct nlattr *data[]) 105 struct nlattr *tb[], struct nlattr *data[])
106{ 106{
107 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 107 struct vlan_dev_info *vlan = vlan_dev_info(dev);
108 struct net_device *real_dev; 108 struct net_device *real_dev;
109 int err; 109 int err;
110 110
@@ -137,11 +137,6 @@ static int vlan_newlink(struct net_device *dev,
137 return register_vlan_dev(dev); 137 return register_vlan_dev(dev);
138} 138}
139 139
140static void vlan_dellink(struct net_device *dev)
141{
142 unregister_vlan_device(dev);
143}
144
145static inline size_t vlan_qos_map_size(unsigned int n) 140static inline size_t vlan_qos_map_size(unsigned int n)
146{ 141{
147 if (n == 0) 142 if (n == 0)
@@ -153,7 +148,7 @@ static inline size_t vlan_qos_map_size(unsigned int n)
153 148
154static size_t vlan_get_size(const struct net_device *dev) 149static size_t vlan_get_size(const struct net_device *dev)
155{ 150{
156 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 151 struct vlan_dev_info *vlan = vlan_dev_info(dev);
157 152
158 return nla_total_size(2) + /* IFLA_VLAN_ID */ 153 return nla_total_size(2) + /* IFLA_VLAN_ID */
159 vlan_qos_map_size(vlan->nr_ingress_mappings) + 154 vlan_qos_map_size(vlan->nr_ingress_mappings) +
@@ -162,14 +157,14 @@ static size_t vlan_get_size(const struct net_device *dev)
162 157
163static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) 158static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
164{ 159{
165 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 160 struct vlan_dev_info *vlan = vlan_dev_info(dev);
166 struct vlan_priority_tci_mapping *pm; 161 struct vlan_priority_tci_mapping *pm;
167 struct ifla_vlan_flags f; 162 struct ifla_vlan_flags f;
168 struct ifla_vlan_qos_mapping m; 163 struct ifla_vlan_qos_mapping m;
169 struct nlattr *nest; 164 struct nlattr *nest;
170 unsigned int i; 165 unsigned int i;
171 166
172 NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id); 167 NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
173 if (vlan->flags) { 168 if (vlan->flags) {
174 f.flags = vlan->flags; 169 f.flags = vlan->flags;
175 f.mask = ~0; 170 f.mask = ~0;
@@ -226,7 +221,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
226 .validate = vlan_validate, 221 .validate = vlan_validate,
227 .newlink = vlan_newlink, 222 .newlink = vlan_newlink,
228 .changelink = vlan_changelink, 223 .changelink = vlan_changelink,
229 .dellink = vlan_dellink, 224 .dellink = unregister_vlan_dev,
230 .get_size = vlan_get_size, 225 .get_size = vlan_get_size,
231 .fill_info = vlan_fill_info, 226 .fill_info = vlan_fill_info,
232}; 227};
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 6cefdf8e381a..a0ec47925597 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -125,10 +125,10 @@ static struct proc_dir_entry *proc_vlan_conf;
125 125
126/* Strings */ 126/* Strings */
127static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = { 127static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
128 [VLAN_NAME_TYPE_RAW_PLUS_VID] = "VLAN_NAME_TYPE_RAW_PLUS_VID", 128 [VLAN_NAME_TYPE_RAW_PLUS_VID] = "VLAN_NAME_TYPE_RAW_PLUS_VID",
129 [VLAN_NAME_TYPE_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD", 129 [VLAN_NAME_TYPE_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD",
130 [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD]= "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD", 130 [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD",
131 [VLAN_NAME_TYPE_PLUS_VID] = "VLAN_NAME_TYPE_PLUS_VID", 131 [VLAN_NAME_TYPE_PLUS_VID] = "VLAN_NAME_TYPE_PLUS_VID",
132}; 132};
133/* 133/*
134 * Interface functions 134 * Interface functions
@@ -158,15 +158,18 @@ void vlan_proc_cleanup(void)
158int __init vlan_proc_init(void) 158int __init vlan_proc_init(void)
159{ 159{
160 proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net); 160 proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net);
161 if (proc_vlan_dir) { 161 if (!proc_vlan_dir)
162 proc_vlan_conf = create_proc_entry(name_conf, 162 goto err;
163 S_IFREG|S_IRUSR|S_IWUSR, 163
164 proc_vlan_dir); 164 proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
165 if (proc_vlan_conf) { 165 proc_vlan_dir);
166 proc_vlan_conf->proc_fops = &vlan_fops; 166 if (!proc_vlan_conf)
167 return 0; 167 goto err;
168 } 168 proc_vlan_conf->proc_fops = &vlan_fops;
169 } 169 return 0;
170
171err:
172 pr_err("%s: can't create entry in proc filesystem!\n", __FUNCTION__);
170 vlan_proc_cleanup(); 173 vlan_proc_cleanup();
171 return -ENOBUFS; 174 return -ENOBUFS;
172} 175}
@@ -175,16 +178,9 @@ int __init vlan_proc_init(void)
175 * Add directory entry for VLAN device. 178 * Add directory entry for VLAN device.
176 */ 179 */
177 180
178int vlan_proc_add_dev (struct net_device *vlandev) 181int vlan_proc_add_dev(struct net_device *vlandev)
179{ 182{
180 struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev); 183 struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
181
182 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
183 printk(KERN_ERR
184 "ERROR: vlan_proc_add, device -:%s:- is NOT a VLAN\n",
185 vlandev->name);
186 return -EINVAL;
187 }
188 184
189 dev_info->dent = create_proc_entry(vlandev->name, 185 dev_info->dent = create_proc_entry(vlandev->name,
190 S_IFREG|S_IRUSR|S_IWUSR, 186 S_IFREG|S_IRUSR|S_IWUSR,
@@ -194,11 +190,6 @@ int vlan_proc_add_dev (struct net_device *vlandev)
194 190
195 dev_info->dent->proc_fops = &vlandev_fops; 191 dev_info->dent->proc_fops = &vlandev_fops;
196 dev_info->dent->data = vlandev; 192 dev_info->dent->data = vlandev;
197
198#ifdef VLAN_DEBUG
199 printk(KERN_ERR "vlan_proc_add, device -:%s:- being added.\n",
200 vlandev->name);
201#endif
202 return 0; 193 return 0;
203} 194}
204 195
@@ -207,28 +198,12 @@ int vlan_proc_add_dev (struct net_device *vlandev)
207 */ 198 */
208int vlan_proc_rem_dev(struct net_device *vlandev) 199int vlan_proc_rem_dev(struct net_device *vlandev)
209{ 200{
210 if (!vlandev) {
211 printk(VLAN_ERR "%s: invalid argument: %p\n",
212 __FUNCTION__, vlandev);
213 return -EINVAL;
214 }
215
216 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
217 printk(VLAN_DBG "%s: invalid argument, device: %s is not a VLAN device, priv_flags: 0x%4hX.\n",
218 __FUNCTION__, vlandev->name, vlandev->priv_flags);
219 return -EINVAL;
220 }
221
222#ifdef VLAN_DEBUG
223 printk(VLAN_DBG "%s: dev: %p\n", __FUNCTION__, vlandev);
224#endif
225
226 /** NOTE: This will consume the memory pointed to by dent, it seems. */ 201 /** NOTE: This will consume the memory pointed to by dent, it seems. */
227 if (VLAN_DEV_INFO(vlandev)->dent) { 202 if (vlan_dev_info(vlandev)->dent) {
228 remove_proc_entry(VLAN_DEV_INFO(vlandev)->dent->name, proc_vlan_dir); 203 remove_proc_entry(vlan_dev_info(vlandev)->dent->name,
229 VLAN_DEV_INFO(vlandev)->dent = NULL; 204 proc_vlan_dir);
205 vlan_dev_info(vlandev)->dent = NULL;
230 } 206 }
231
232 return 0; 207 return 0;
233} 208}
234 209
@@ -245,6 +220,7 @@ static inline int is_vlan_dev(struct net_device *dev)
245 220
246/* start read of /proc/net/vlan/config */ 221/* start read of /proc/net/vlan/config */
247static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) 222static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
223 __acquires(dev_base_lock)
248{ 224{
249 struct net_device *dev; 225 struct net_device *dev;
250 loff_t i = 1; 226 loff_t i = 1;
@@ -286,6 +262,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
286} 262}
287 263
288static void vlan_seq_stop(struct seq_file *seq, void *v) 264static void vlan_seq_stop(struct seq_file *seq, void *v)
265 __releases(dev_base_lock)
289{ 266{
290 read_unlock(&dev_base_lock); 267 read_unlock(&dev_base_lock);
291} 268}
@@ -301,10 +278,10 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
301 nmtype = vlan_name_type_str[vlan_name_type]; 278 nmtype = vlan_name_type_str[vlan_name_type];
302 279
303 seq_printf(seq, "Name-Type: %s\n", 280 seq_printf(seq, "Name-Type: %s\n",
304 nmtype ? nmtype : "UNKNOWN" ); 281 nmtype ? nmtype : "UNKNOWN");
305 } else { 282 } else {
306 const struct net_device *vlandev = v; 283 const struct net_device *vlandev = v;
307 const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev); 284 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
308 285
309 seq_printf(seq, "%-15s| %d | %s\n", vlandev->name, 286 seq_printf(seq, "%-15s| %d | %s\n", vlandev->name,
310 dev_info->vlan_id, dev_info->real_dev->name); 287 dev_info->vlan_id, dev_info->real_dev->name);
@@ -315,20 +292,18 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
315static int vlandev_seq_show(struct seq_file *seq, void *offset) 292static int vlandev_seq_show(struct seq_file *seq, void *offset)
316{ 293{
317 struct net_device *vlandev = (struct net_device *) seq->private; 294 struct net_device *vlandev = (struct net_device *) seq->private;
318 const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev); 295 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
319 struct net_device_stats *stats; 296 struct net_device_stats *stats = &vlandev->stats;
320 static const char fmt[] = "%30s %12lu\n"; 297 static const char fmt[] = "%30s %12lu\n";
321 int i; 298 int i;
322 299
323 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) 300 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN))
324 return 0; 301 return 0;
325 302
326 seq_printf(seq, "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", 303 seq_printf(seq,
327 vlandev->name, dev_info->vlan_id, 304 "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n",
328 (int)(dev_info->flags & 1), vlandev->priv_flags); 305 vlandev->name, dev_info->vlan_id,
329 306 (int)(dev_info->flags & 1), vlandev->priv_flags);
330
331 stats = vlan_dev_get_stats(vlandev);
332 307
333 seq_printf(seq, fmt, "total frames received", stats->rx_packets); 308 seq_printf(seq, fmt, "total frames received", stats->rx_packets);
334 seq_printf(seq, fmt, "total bytes received", stats->rx_bytes); 309 seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
@@ -342,16 +317,16 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
342 dev_info->cnt_encap_on_xmit); 317 dev_info->cnt_encap_on_xmit);
343 seq_printf(seq, "Device: %s", dev_info->real_dev->name); 318 seq_printf(seq, "Device: %s", dev_info->real_dev->name);
344 /* now show all PRIORITY mappings relating to this VLAN */ 319 /* now show all PRIORITY mappings relating to this VLAN */
345 seq_printf(seq, 320 seq_printf(seq, "\nINGRESS priority mappings: "
346 "\nINGRESS priority mappings: 0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", 321 "0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n",
347 dev_info->ingress_priority_map[0], 322 dev_info->ingress_priority_map[0],
348 dev_info->ingress_priority_map[1], 323 dev_info->ingress_priority_map[1],
349 dev_info->ingress_priority_map[2], 324 dev_info->ingress_priority_map[2],
350 dev_info->ingress_priority_map[3], 325 dev_info->ingress_priority_map[3],
351 dev_info->ingress_priority_map[4], 326 dev_info->ingress_priority_map[4],
352 dev_info->ingress_priority_map[5], 327 dev_info->ingress_priority_map[5],
353 dev_info->ingress_priority_map[6], 328 dev_info->ingress_priority_map[6],
354 dev_info->ingress_priority_map[7]); 329 dev_info->ingress_priority_map[7]);
355 330
356 seq_printf(seq, "EGRESSS priority Mappings: "); 331 seq_printf(seq, "EGRESSS priority Mappings: ");
357 for (i = 0; i < 16; i++) { 332 for (i = 0; i < 16; i++) {
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index f908ee332fd8..da542cacc5a5 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -4,16 +4,15 @@
4#ifdef CONFIG_PROC_FS 4#ifdef CONFIG_PROC_FS
5int vlan_proc_init(void); 5int vlan_proc_init(void);
6int vlan_proc_rem_dev(struct net_device *vlandev); 6int vlan_proc_rem_dev(struct net_device *vlandev);
7int vlan_proc_add_dev (struct net_device *vlandev); 7int vlan_proc_add_dev(struct net_device *vlandev);
8void vlan_proc_cleanup (void); 8void vlan_proc_cleanup(void);
9 9
10#else /* No CONFIG_PROC_FS */ 10#else /* No CONFIG_PROC_FS */
11 11
12#define vlan_proc_init() (0) 12#define vlan_proc_init() (0)
13#define vlan_proc_cleanup() do {} while(0) 13#define vlan_proc_cleanup() do {} while (0)
14#define vlan_proc_add_dev(dev) ({(void)(dev), 0;}) 14#define vlan_proc_add_dev(dev) ({(void)(dev), 0; })
15#define vlan_proc_rem_dev(dev) ({(void)(dev), 0;}) 15#define vlan_proc_rem_dev(dev) ({(void)(dev), 0; })
16
17#endif 16#endif
18 17
19#endif /* !(__BEN_VLAN_PROC_INC__) */ 18#endif /* !(__BEN_VLAN_PROC_INC__) */
diff --git a/net/9p/conv.c b/net/9p/conv.c
index aa2aa9884f95..3fe35d532c87 100644
--- a/net/9p/conv.c
+++ b/net/9p/conv.c
@@ -128,11 +128,6 @@ static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
128 return ret; 128 return ret;
129} 129}
130 130
131static inline void buf_put_string(struct cbuf *buf, const char *s)
132{
133 buf_put_stringn(buf, s, strlen(s));
134}
135
136static u8 buf_get_int8(struct cbuf *buf) 131static u8 buf_get_int8(struct cbuf *buf)
137{ 132{
138 u8 ret = 0; 133 u8 ret = 0;
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 41d70f47375d..8f9763a9dc12 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -76,9 +76,9 @@ struct p9_trans_module *v9fs_match_trans(const substring_t *name)
76 list_for_each(p, &v9fs_trans_list) { 76 list_for_each(p, &v9fs_trans_list) {
77 t = list_entry(p, struct p9_trans_module, list); 77 t = list_entry(p, struct p9_trans_module, list);
78 if (strncmp(t->name, name->from, name->to-name->from) == 0) 78 if (strncmp(t->name, name->from, name->to-name->from) == 0)
79 break; 79 return t;
80 } 80 }
81 return t; 81 return NULL;
82} 82}
83EXPORT_SYMBOL(v9fs_match_trans); 83EXPORT_SYMBOL(v9fs_match_trans);
84 84
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 30269a4ff22a..62332ed9da4a 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -62,13 +62,14 @@ struct p9_trans_fd {
62 62
63enum { 63enum {
64 /* Options that take integer arguments */ 64 /* Options that take integer arguments */
65 Opt_port, Opt_rfdno, Opt_wfdno, 65 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
66}; 66};
67 67
68static match_table_t tokens = { 68static match_table_t tokens = {
69 {Opt_port, "port=%u"}, 69 {Opt_port, "port=%u"},
70 {Opt_rfdno, "rfdno=%u"}, 70 {Opt_rfdno, "rfdno=%u"},
71 {Opt_wfdno, "wfdno=%u"}, 71 {Opt_wfdno, "wfdno=%u"},
72 {Opt_err, NULL},
72}; 73};
73 74
74/** 75/**
diff --git a/net/Kconfig b/net/Kconfig
index ab4e6da5012f..b6a5d454f2ff 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -144,9 +144,21 @@ config NETFILTER_DEBUG
144 You can say Y here if you want to get additional messages useful in 144 You can say Y here if you want to get additional messages useful in
145 debugging the netfilter code. 145 debugging the netfilter code.
146 146
147config NETFILTER_ADVANCED
148 bool "Advanced netfilter configuration"
149 depends on NETFILTER
150 default y
151 help
152 If you say Y here you can select between all the netfilter modules.
153 If you say N the more ununsual ones will not be shown and the
154 basic ones needed by most people will default to 'M'.
155
156 If unsure, say Y.
157
147config BRIDGE_NETFILTER 158config BRIDGE_NETFILTER
148 bool "Bridged IP/ARP packets filtering" 159 bool "Bridged IP/ARP packets filtering"
149 depends on BRIDGE && NETFILTER && INET 160 depends on BRIDGE && NETFILTER && INET
161 depends on NETFILTER_ADVANCED
150 default y 162 default y
151 ---help--- 163 ---help---
152 Enabling this option will let arptables resp. iptables see bridged 164 Enabling this option will let arptables resp. iptables see bridged
@@ -218,6 +230,7 @@ endmenu
218endmenu 230endmenu
219 231
220source "net/ax25/Kconfig" 232source "net/ax25/Kconfig"
233source "net/can/Kconfig"
221source "net/irda/Kconfig" 234source "net/irda/Kconfig"
222source "net/bluetooth/Kconfig" 235source "net/bluetooth/Kconfig"
223source "net/rxrpc/Kconfig" 236source "net/rxrpc/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index bbe7d2a41486..b7a13643b549 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_LAPB) += lapb/
34obj-$(CONFIG_NETROM) += netrom/ 34obj-$(CONFIG_NETROM) += netrom/
35obj-$(CONFIG_ROSE) += rose/ 35obj-$(CONFIG_ROSE) += rose/
36obj-$(CONFIG_AX25) += ax25/ 36obj-$(CONFIG_AX25) += ax25/
37obj-$(CONFIG_CAN) += can/
37obj-$(CONFIG_IRDA) += irda/ 38obj-$(CONFIG_IRDA) += irda/
38obj-$(CONFIG_BT) += bluetooth/ 39obj-$(CONFIG_BT) += bluetooth/
39obj-$(CONFIG_SUNRPC) += sunrpc/ 40obj-$(CONFIG_SUNRPC) += sunrpc/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 6c5c6dc098ec..18058bbc7962 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -874,9 +874,7 @@ void __init aarp_proto_init(void)
874 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); 874 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
875 if (!aarp_dl) 875 if (!aarp_dl)
876 printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); 876 printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
877 init_timer(&aarp_timer); 877 setup_timer(&aarp_timer, aarp_expire_timeout, 0);
878 aarp_timer.function = aarp_expire_timeout;
879 aarp_timer.data = 0;
880 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; 878 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time;
881 add_timer(&aarp_timer); 879 add_timer(&aarp_timer);
882 register_netdevice_notifier(&aarp_notifier); 880 register_netdevice_notifier(&aarp_notifier);
@@ -943,6 +941,7 @@ static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos)
943} 941}
944 942
945static void *aarp_seq_start(struct seq_file *seq, loff_t *pos) 943static void *aarp_seq_start(struct seq_file *seq, loff_t *pos)
944 __acquires(aarp_lock)
946{ 945{
947 struct aarp_iter_state *iter = seq->private; 946 struct aarp_iter_state *iter = seq->private;
948 947
@@ -977,6 +976,7 @@ static void *aarp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
977} 976}
978 977
979static void aarp_seq_stop(struct seq_file *seq, void *v) 978static void aarp_seq_stop(struct seq_file *seq, void *v)
979 __releases(aarp_lock)
980{ 980{
981 read_unlock_bh(&aarp_lock); 981 read_unlock_bh(&aarp_lock);
982} 982}
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 05d9652afcb6..8e8dcfd532db 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -27,6 +27,7 @@ static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
27} 27}
28 28
29static void *atalk_seq_interface_start(struct seq_file *seq, loff_t *pos) 29static void *atalk_seq_interface_start(struct seq_file *seq, loff_t *pos)
30 __acquires(atalk_interfaces_lock)
30{ 31{
31 loff_t l = *pos; 32 loff_t l = *pos;
32 33
@@ -52,6 +53,7 @@ out:
52} 53}
53 54
54static void atalk_seq_interface_stop(struct seq_file *seq, void *v) 55static void atalk_seq_interface_stop(struct seq_file *seq, void *v)
56 __releases(atalk_interfaces_lock)
55{ 57{
56 read_unlock_bh(&atalk_interfaces_lock); 58 read_unlock_bh(&atalk_interfaces_lock);
57} 59}
@@ -86,6 +88,7 @@ static __inline__ struct atalk_route *atalk_get_route_idx(loff_t pos)
86} 88}
87 89
88static void *atalk_seq_route_start(struct seq_file *seq, loff_t *pos) 90static void *atalk_seq_route_start(struct seq_file *seq, loff_t *pos)
91 __acquires(atalk_routes_lock)
89{ 92{
90 loff_t l = *pos; 93 loff_t l = *pos;
91 94
@@ -111,6 +114,7 @@ out:
111} 114}
112 115
113static void atalk_seq_route_stop(struct seq_file *seq, void *v) 116static void atalk_seq_route_stop(struct seq_file *seq, void *v)
117 __releases(atalk_routes_lock)
114{ 118{
115 read_unlock_bh(&atalk_routes_lock); 119 read_unlock_bh(&atalk_routes_lock);
116} 120}
@@ -154,6 +158,7 @@ found:
154} 158}
155 159
156static void *atalk_seq_socket_start(struct seq_file *seq, loff_t *pos) 160static void *atalk_seq_socket_start(struct seq_file *seq, loff_t *pos)
161 __acquires(atalk_sockets_lock)
157{ 162{
158 loff_t l = *pos; 163 loff_t l = *pos;
159 164
@@ -176,6 +181,7 @@ out:
176} 181}
177 182
178static void atalk_seq_socket_stop(struct seq_file *seq, void *v) 183static void atalk_seq_socket_stop(struct seq_file *seq, void *v)
184 __releases(atalk_sockets_lock)
179{ 185{
180 read_unlock_bh(&atalk_sockets_lock); 186 read_unlock_bh(&atalk_sockets_lock);
181} 187}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7c0b5151d526..3be55c8ca4ef 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -177,10 +177,9 @@ static inline void atalk_destroy_socket(struct sock *sk)
177 177
178 if (atomic_read(&sk->sk_wmem_alloc) || 178 if (atomic_read(&sk->sk_wmem_alloc) ||
179 atomic_read(&sk->sk_rmem_alloc)) { 179 atomic_read(&sk->sk_rmem_alloc)) {
180 init_timer(&sk->sk_timer); 180 setup_timer(&sk->sk_timer, atalk_destroy_timer,
181 (unsigned long)sk);
181 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; 182 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
182 sk->sk_timer.function = atalk_destroy_timer;
183 sk->sk_timer.data = (unsigned long)sk;
184 add_timer(&sk->sk_timer); 183 add_timer(&sk->sk_timer);
185 } else 184 } else
186 sock_put(sk); 185 sock_put(sk);
@@ -1044,7 +1043,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol)
1044 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 1043 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
1045 goto out; 1044 goto out;
1046 rc = -ENOMEM; 1045 rc = -ENOMEM;
1047 sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); 1046 sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto);
1048 if (!sk) 1047 if (!sk)
1049 goto out; 1048 goto out;
1050 rc = 0; 1049 rc = 0;
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index 7df1778e221a..621805dfa2f4 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -49,31 +49,17 @@ static struct ctl_table atalk_table[] = {
49 { 0 }, 49 { 0 },
50}; 50};
51 51
52static struct ctl_table atalk_dir_table[] = { 52static struct ctl_path atalk_path[] = {
53 { 53 { .procname = "net", .ctl_name = CTL_NET, },
54 .ctl_name = NET_ATALK, 54 { .procname = "appletalk", .ctl_name = NET_ATALK, },
55 .procname = "appletalk", 55 { }
56 .mode = 0555,
57 .child = atalk_table,
58 },
59 { 0 },
60};
61
62static struct ctl_table atalk_root_table[] = {
63 {
64 .ctl_name = CTL_NET,
65 .procname = "net",
66 .mode = 0555,
67 .child = atalk_dir_table,
68 },
69 { 0 },
70}; 56};
71 57
72static struct ctl_table_header *atalk_table_header; 58static struct ctl_table_header *atalk_table_header;
73 59
74void atalk_register_sysctl(void) 60void atalk_register_sysctl(void)
75{ 61{
76 atalk_table_header = register_sysctl_table(atalk_root_table); 62 atalk_table_header = register_sysctl_paths(atalk_path, atalk_table);
77} 63}
78 64
79void atalk_unregister_sysctl(void) 65void atalk_unregister_sysctl(void)
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index 21ff276b2d80..754ea103b378 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig
@@ -1,10 +1,9 @@
1# 1#
2# Asynchronous Transfer Mode (ATM) (EXPERIMENTAL) 2# Asynchronous Transfer Mode (ATM)
3# 3#
4 4
5config ATM 5config ATM
6 tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)" 6 tristate "Asynchronous Transfer Mode (ATM)"
7 depends on EXPERIMENTAL
8 ---help--- 7 ---help---
9 ATM is a high-speed networking technology for Local Area Networks 8 ATM is a high-speed networking technology for Local Area Networks
10 and Wide Area Networks. It uses a fixed packet size and is 9 and Wide Area Networks. It uses a fixed packet size and is
@@ -20,7 +19,7 @@ config ATM
20 further details. 19 further details.
21 20
22config ATM_CLIP 21config ATM_CLIP
23 tristate "Classical IP over ATM (EXPERIMENTAL)" 22 tristate "Classical IP over ATM"
24 depends on ATM && INET 23 depends on ATM && INET
25 help 24 help
26 Classical IP over ATM for PVCs and SVCs, supporting InARP and 25 Classical IP over ATM for PVCs and SVCs, supporting InARP and
@@ -29,7 +28,7 @@ config ATM_CLIP
29 (LANE)" below. 28 (LANE)" below.
30 29
31config ATM_CLIP_NO_ICMP 30config ATM_CLIP_NO_ICMP
32 bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)" 31 bool "Do NOT send ICMP if no neighbour"
33 depends on ATM_CLIP 32 depends on ATM_CLIP
34 help 33 help
35 Normally, an "ICMP host unreachable" message is sent if a neighbour 34 Normally, an "ICMP host unreachable" message is sent if a neighbour
@@ -39,7 +38,7 @@ config ATM_CLIP_NO_ICMP
39 such neighbours are silently discarded instead. 38 such neighbours are silently discarded instead.
40 39
41config ATM_LANE 40config ATM_LANE
42 tristate "LAN Emulation (LANE) support (EXPERIMENTAL)" 41 tristate "LAN Emulation (LANE) support"
43 depends on ATM 42 depends on ATM
44 help 43 help
45 LAN Emulation emulates services of existing LANs across an ATM 44 LAN Emulation emulates services of existing LANs across an ATM
@@ -48,7 +47,7 @@ config ATM_LANE
48 ELAN and Ethernet segments. You need LANE if you want to try MPOA. 47 ELAN and Ethernet segments. You need LANE if you want to try MPOA.
49 48
50config ATM_MPOA 49config ATM_MPOA
51 tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)" 50 tristate "Multi-Protocol Over ATM (MPOA) support"
52 depends on ATM && INET && ATM_LANE!=n 51 depends on ATM && INET && ATM_LANE!=n
53 help 52 help
54 Multi-Protocol Over ATM allows ATM edge devices such as routers, 53 Multi-Protocol Over ATM allows ATM edge devices such as routers,
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 9ef07eda2c43..1b88311f2130 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -9,13 +9,15 @@
9 9
10#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev) 10#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev)
11 11
12static ssize_t show_type(struct class_device *cdev, char *buf) 12static ssize_t show_type(struct device *cdev,
13 struct device_attribute *attr, char *buf)
13{ 14{
14 struct atm_dev *adev = to_atm_dev(cdev); 15 struct atm_dev *adev = to_atm_dev(cdev);
15 return sprintf(buf, "%s\n", adev->type); 16 return sprintf(buf, "%s\n", adev->type);
16} 17}
17 18
18static ssize_t show_address(struct class_device *cdev, char *buf) 19static ssize_t show_address(struct device *cdev,
20 struct device_attribute *attr, char *buf)
19{ 21{
20 char *pos = buf; 22 char *pos = buf;
21 struct atm_dev *adev = to_atm_dev(cdev); 23 struct atm_dev *adev = to_atm_dev(cdev);
@@ -28,7 +30,8 @@ static ssize_t show_address(struct class_device *cdev, char *buf)
28 return pos - buf; 30 return pos - buf;
29} 31}
30 32
31static ssize_t show_atmaddress(struct class_device *cdev, char *buf) 33static ssize_t show_atmaddress(struct device *cdev,
34 struct device_attribute *attr, char *buf)
32{ 35{
33 unsigned long flags; 36 unsigned long flags;
34 char *pos = buf; 37 char *pos = buf;
@@ -54,7 +57,8 @@ static ssize_t show_atmaddress(struct class_device *cdev, char *buf)
54 return pos - buf; 57 return pos - buf;
55} 58}
56 59
57static ssize_t show_carrier(struct class_device *cdev, char *buf) 60static ssize_t show_carrier(struct device *cdev,
61 struct device_attribute *attr, char *buf)
58{ 62{
59 char *pos = buf; 63 char *pos = buf;
60 struct atm_dev *adev = to_atm_dev(cdev); 64 struct atm_dev *adev = to_atm_dev(cdev);
@@ -65,7 +69,8 @@ static ssize_t show_carrier(struct class_device *cdev, char *buf)
65 return pos - buf; 69 return pos - buf;
66} 70}
67 71
68static ssize_t show_link_rate(struct class_device *cdev, char *buf) 72static ssize_t show_link_rate(struct device *cdev,
73 struct device_attribute *attr, char *buf)
69{ 74{
70 char *pos = buf; 75 char *pos = buf;
71 struct atm_dev *adev = to_atm_dev(cdev); 76 struct atm_dev *adev = to_atm_dev(cdev);
@@ -90,22 +95,23 @@ static ssize_t show_link_rate(struct class_device *cdev, char *buf)
90 return pos - buf; 95 return pos - buf;
91} 96}
92 97
93static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); 98static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
94static CLASS_DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL); 99static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
95static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); 100static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
96static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL); 101static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
97static CLASS_DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL); 102static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
98 103
99static struct class_device_attribute *atm_attrs[] = { 104static struct device_attribute *atm_attrs[] = {
100 &class_device_attr_atmaddress, 105 &dev_attr_atmaddress,
101 &class_device_attr_address, 106 &dev_attr_address,
102 &class_device_attr_carrier, 107 &dev_attr_carrier,
103 &class_device_attr_type, 108 &dev_attr_type,
104 &class_device_attr_link_rate, 109 &dev_attr_link_rate,
105 NULL 110 NULL
106}; 111};
107 112
108static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env) 113
114static int atm_uevent(struct device *cdev, struct kobj_uevent_env *env)
109{ 115{
110 struct atm_dev *adev; 116 struct atm_dev *adev;
111 117
@@ -122,7 +128,7 @@ static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env)
122 return 0; 128 return 0;
123} 129}
124 130
125static void atm_release(struct class_device *cdev) 131static void atm_release(struct device *cdev)
126{ 132{
127 struct atm_dev *adev = to_atm_dev(cdev); 133 struct atm_dev *adev = to_atm_dev(cdev);
128 134
@@ -131,25 +137,25 @@ static void atm_release(struct class_device *cdev)
131 137
132static struct class atm_class = { 138static struct class atm_class = {
133 .name = "atm", 139 .name = "atm",
134 .release = atm_release, 140 .dev_release = atm_release,
135 .uevent = atm_uevent, 141 .dev_uevent = atm_uevent,
136}; 142};
137 143
138int atm_register_sysfs(struct atm_dev *adev) 144int atm_register_sysfs(struct atm_dev *adev)
139{ 145{
140 struct class_device *cdev = &adev->class_dev; 146 struct device *cdev = &adev->class_dev;
141 int i, j, err; 147 int i, j, err;
142 148
143 cdev->class = &atm_class; 149 cdev->class = &atm_class;
144 class_set_devdata(cdev, adev); 150 dev_set_drvdata(cdev, adev);
145 151
146 snprintf(cdev->class_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); 152 snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number);
147 err = class_device_register(cdev); 153 err = device_register(cdev);
148 if (err < 0) 154 if (err < 0)
149 return err; 155 return err;
150 156
151 for (i = 0; atm_attrs[i]; i++) { 157 for (i = 0; atm_attrs[i]; i++) {
152 err = class_device_create_file(cdev, atm_attrs[i]); 158 err = device_create_file(cdev, atm_attrs[i]);
153 if (err) 159 if (err)
154 goto err_out; 160 goto err_out;
155 } 161 }
@@ -158,16 +164,16 @@ int atm_register_sysfs(struct atm_dev *adev)
158 164
159err_out: 165err_out:
160 for (j = 0; j < i; j++) 166 for (j = 0; j < i; j++)
161 class_device_remove_file(cdev, atm_attrs[j]); 167 device_remove_file(cdev, atm_attrs[j]);
162 class_device_del(cdev); 168 device_del(cdev);
163 return err; 169 return err;
164} 170}
165 171
166void atm_unregister_sysfs(struct atm_dev *adev) 172void atm_unregister_sysfs(struct atm_dev *adev)
167{ 173{
168 struct class_device *cdev = &adev->class_dev; 174 struct device *cdev = &adev->class_dev;
169 175
170 class_device_del(cdev); 176 device_del(cdev);
171} 177}
172 178
173int __init atm_sysfs_init(void) 179int __init atm_sysfs_init(void)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ba6428f204f9..574d9a964176 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -1,8 +1,10 @@
1/* 1/*
2Experimental ethernet netdevice using ATM AAL5 as underlying carrier 2 * Ethernet netdevice using ATM AAL5 as underlying carrier
3(RFC1483 obsoleted by RFC2684) for Linux 2.4 3 * (RFC1483 obsoleted by RFC2684) for Linux
4Author: Marcell GAL, 2000, XDSL Ltd, Hungary 4 *
5*/ 5 * Authors: Marcell GAL, 2000, XDSL Ltd, Hungary
6 * Eric Kinzie, 2006-2007, US Naval Research Laboratory
7 */
6 8
7#include <linux/module.h> 9#include <linux/module.h>
8#include <linux/init.h> 10#include <linux/init.h>
@@ -39,21 +41,35 @@ static void skb_debug(const struct sk_buff *skb)
39#define skb_debug(skb) do {} while (0) 41#define skb_debug(skb) do {} while (0)
40#endif 42#endif
41 43
44#define BR2684_ETHERTYPE_LEN 2
45#define BR2684_PAD_LEN 2
46
47#define LLC 0xaa, 0xaa, 0x03
48#define SNAP_BRIDGED 0x00, 0x80, 0xc2
49#define SNAP_ROUTED 0x00, 0x00, 0x00
50#define PID_ETHERNET 0x00, 0x07
51#define ETHERTYPE_IPV4 0x08, 0x00
52#define ETHERTYPE_IPV6 0x86, 0xdd
53#define PAD_BRIDGED 0x00, 0x00
54
55static unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
56static unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
42static unsigned char llc_oui_pid_pad[] = 57static unsigned char llc_oui_pid_pad[] =
43 { 0xAA, 0xAA, 0x03, 0x00, 0x80, 0xC2, 0x00, 0x07, 0x00, 0x00 }; 58 { LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
44#define PADLEN (2) 59static unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
60static unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
45 61
46enum br2684_encaps { 62enum br2684_encaps {
47 e_vc = BR2684_ENCAPS_VC, 63 e_vc = BR2684_ENCAPS_VC,
48 e_llc = BR2684_ENCAPS_LLC, 64 e_llc = BR2684_ENCAPS_LLC,
49}; 65};
50 66
51struct br2684_vcc { 67struct br2684_vcc {
52 struct atm_vcc *atmvcc; 68 struct atm_vcc *atmvcc;
53 struct net_device *device; 69 struct net_device *device;
54 /* keep old push,pop functions for chaining */ 70 /* keep old push, pop functions for chaining */
55 void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); 71 void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
56 /* void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); */ 72 /* void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); */
57 enum br2684_encaps encaps; 73 enum br2684_encaps encaps;
58 struct list_head brvccs; 74 struct list_head brvccs;
59#ifdef CONFIG_ATM_BR2684_IPFILTER 75#ifdef CONFIG_ATM_BR2684_IPFILTER
@@ -66,9 +82,10 @@ struct br2684_dev {
66 struct net_device *net_dev; 82 struct net_device *net_dev;
67 struct list_head br2684_devs; 83 struct list_head br2684_devs;
68 int number; 84 int number;
69 struct list_head brvccs; /* one device <=> one vcc (before xmas) */ 85 struct list_head brvccs; /* one device <=> one vcc (before xmas) */
70 struct net_device_stats stats; 86 struct net_device_stats stats;
71 int mac_was_set; 87 int mac_was_set;
88 enum br2684_payload payload;
72}; 89};
73 90
74/* 91/*
@@ -84,7 +101,7 @@ static LIST_HEAD(br2684_devs);
84 101
85static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev) 102static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev)
86{ 103{
87 return (struct br2684_dev *) net_dev->priv; 104 return (struct br2684_dev *)net_dev->priv;
88} 105}
89 106
90static inline struct net_device *list_entry_brdev(const struct list_head *le) 107static inline struct net_device *list_entry_brdev(const struct list_head *le)
@@ -94,7 +111,7 @@ static inline struct net_device *list_entry_brdev(const struct list_head *le)
94 111
95static inline struct br2684_vcc *BR2684_VCC(const struct atm_vcc *atmvcc) 112static inline struct br2684_vcc *BR2684_VCC(const struct atm_vcc *atmvcc)
96{ 113{
97 return (struct br2684_vcc *) (atmvcc->user_back); 114 return (struct br2684_vcc *)(atmvcc->user_back);
98} 115}
99 116
100static inline struct br2684_vcc *list_entry_brvcc(const struct list_head *le) 117static inline struct br2684_vcc *list_entry_brvcc(const struct list_head *le)
@@ -132,10 +149,11 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
132 * otherwise false 149 * otherwise false
133 */ 150 */
134static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, 151static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
135 struct br2684_vcc *brvcc) 152 struct br2684_vcc *brvcc)
136{ 153{
137 struct atm_vcc *atmvcc; 154 struct atm_vcc *atmvcc;
138 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; 155 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
156
139 if (skb_headroom(skb) < minheadroom) { 157 if (skb_headroom(skb) < minheadroom) {
140 struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom); 158 struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
141 brvcc->copies_needed++; 159 brvcc->copies_needed++;
@@ -146,23 +164,48 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
146 } 164 }
147 skb = skb2; 165 skb = skb2;
148 } 166 }
149 skb_push(skb, minheadroom); 167
150 if (brvcc->encaps == e_llc) 168 if (brvcc->encaps == e_llc) {
151 skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10); 169 if (brdev->payload == p_bridged) {
152 else 170 skb_push(skb, sizeof(llc_oui_pid_pad));
153 memset(skb->data, 0, 2); 171 skb_copy_to_linear_data(skb, llc_oui_pid_pad,
172 sizeof(llc_oui_pid_pad));
173 } else if (brdev->payload == p_routed) {
174 unsigned short prot = ntohs(skb->protocol);
175
176 skb_push(skb, sizeof(llc_oui_ipv4));
177 switch (prot) {
178 case ETH_P_IP:
179 skb_copy_to_linear_data(skb, llc_oui_ipv4,
180 sizeof(llc_oui_ipv4));
181 break;
182 case ETH_P_IPV6:
183 skb_copy_to_linear_data(skb, llc_oui_ipv6,
184 sizeof(llc_oui_ipv6));
185 break;
186 default:
187 dev_kfree_skb(skb);
188 return 0;
189 }
190 }
191 } else {
192 skb_push(skb, 2);
193 if (brdev->payload == p_bridged)
194 memset(skb->data, 0, 2);
195 }
154 skb_debug(skb); 196 skb_debug(skb);
155 197
156 ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; 198 ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
157 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); 199 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
158 if (!atm_may_send(atmvcc, skb->truesize)) { 200 if (!atm_may_send(atmvcc, skb->truesize)) {
159 /* we free this here for now, because we cannot know in a higher 201 /*
160 layer whether the skb point it supplied wasn't freed yet. 202 * We free this here for now, because we cannot know in a higher
161 now, it always is. 203 * layer whether the skb pointer it supplied wasn't freed yet.
162 */ 204 * Now, it always is.
205 */
163 dev_kfree_skb(skb); 206 dev_kfree_skb(skb);
164 return 0; 207 return 0;
165 } 208 }
166 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); 209 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
167 ATM_SKB(skb)->atm_options = atmvcc->atm_options; 210 ATM_SKB(skb)->atm_options = atmvcc->atm_options;
168 brdev->stats.tx_packets++; 211 brdev->stats.tx_packets++;
@@ -172,10 +215,9 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
172} 215}
173 216
174static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb, 217static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb,
175 struct br2684_dev *brdev) 218 struct br2684_dev *brdev)
176{ 219{
177 return list_empty(&brdev->brvccs) ? NULL : 220 return list_empty(&brdev->brvccs) ? NULL : list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */
178 list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */
179} 221}
180 222
181static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) 223static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -199,11 +241,10 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
199 /* 241 /*
200 * We should probably use netif_*_queue() here, but that 242 * We should probably use netif_*_queue() here, but that
201 * involves added complication. We need to walk before 243 * involves added complication. We need to walk before
202 * we can run 244 * we can run.
245 *
246 * Don't free here! this pointer might be no longer valid!
203 */ 247 */
204 /* don't free here! this pointer might be no longer valid!
205 dev_kfree_skb(skb);
206 */
207 brdev->stats.tx_errors++; 248 brdev->stats.tx_errors++;
208 brdev->stats.tx_fifo_errors++; 249 brdev->stats.tx_fifo_errors++;
209 } 250 }
@@ -217,12 +258,11 @@ static struct net_device_stats *br2684_get_stats(struct net_device *dev)
217 return &BRPRIV(dev)->stats; 258 return &BRPRIV(dev)->stats;
218} 259}
219 260
220
221/* 261/*
222 * We remember when the MAC gets set, so we don't override it later with 262 * We remember when the MAC gets set, so we don't override it later with
223 * the ESI of the ATM card of the first VC 263 * the ESI of the ATM card of the first VC
224 */ 264 */
225static int (*my_eth_mac_addr)(struct net_device *, void *); 265static int (*my_eth_mac_addr) (struct net_device *, void *);
226static int br2684_mac_addr(struct net_device *dev, void *p) 266static int br2684_mac_addr(struct net_device *dev, void *p)
227{ 267{
228 int err = my_eth_mac_addr(dev, p); 268 int err = my_eth_mac_addr(dev, p);
@@ -233,7 +273,7 @@ static int br2684_mac_addr(struct net_device *dev, void *p)
233 273
234#ifdef CONFIG_ATM_BR2684_IPFILTER 274#ifdef CONFIG_ATM_BR2684_IPFILTER
235/* this IOCTL is experimental. */ 275/* this IOCTL is experimental. */
236static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg) 276static int br2684_setfilt(struct atm_vcc *atmvcc, void __user * arg)
237{ 277{
238 struct br2684_vcc *brvcc; 278 struct br2684_vcc *brvcc;
239 struct br2684_filter_set fs; 279 struct br2684_filter_set fs;
@@ -243,13 +283,12 @@ static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg)
243 if (fs.ifspec.method != BR2684_FIND_BYNOTHING) { 283 if (fs.ifspec.method != BR2684_FIND_BYNOTHING) {
244 /* 284 /*
245 * This is really a per-vcc thing, but we can also search 285 * This is really a per-vcc thing, but we can also search
246 * by device 286 * by device.
247 */ 287 */
248 struct br2684_dev *brdev; 288 struct br2684_dev *brdev;
249 read_lock(&devs_lock); 289 read_lock(&devs_lock);
250 brdev = BRPRIV(br2684_find_dev(&fs.ifspec)); 290 brdev = BRPRIV(br2684_find_dev(&fs.ifspec));
251 if (brdev == NULL || list_empty(&brdev->brvccs) || 291 if (brdev == NULL || list_empty(&brdev->brvccs) || brdev->brvccs.next != brdev->brvccs.prev) /* >1 VCC */
252 brdev->brvccs.next != brdev->brvccs.prev) /* >1 VCC */
253 brvcc = NULL; 292 brvcc = NULL;
254 else 293 else
255 brvcc = list_entry_brvcc(brdev->brvccs.next); 294 brvcc = list_entry_brvcc(brdev->brvccs.next);
@@ -267,15 +306,16 @@ static inline int
267packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb) 306packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
268{ 307{
269 if (brvcc->filter.netmask == 0) 308 if (brvcc->filter.netmask == 0)
270 return 0; /* no filter in place */ 309 return 0; /* no filter in place */
271 if (type == htons(ETH_P_IP) && 310 if (type == htons(ETH_P_IP) &&
272 (((struct iphdr *) (skb->data))->daddr & brvcc->filter. 311 (((struct iphdr *)(skb->data))->daddr & brvcc->filter.
273 netmask) == brvcc->filter.prefix) 312 netmask) == brvcc->filter.prefix)
274 return 0; 313 return 0;
275 if (type == htons(ETH_P_ARP)) 314 if (type == htons(ETH_P_ARP))
276 return 0; 315 return 0;
277 /* TODO: we should probably filter ARPs too.. don't want to have 316 /*
278 * them returning values that don't make sense, or is that ok? 317 * TODO: we should probably filter ARPs too.. don't want to have
318 * them returning values that don't make sense, or is that ok?
279 */ 319 */
280 return 1; /* drop */ 320 return 1; /* drop */
281} 321}
@@ -299,7 +339,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
299 struct br2684_vcc *brvcc = BR2684_VCC(atmvcc); 339 struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
300 struct net_device *net_dev = brvcc->device; 340 struct net_device *net_dev = brvcc->device;
301 struct br2684_dev *brdev = BRPRIV(net_dev); 341 struct br2684_dev *brdev = BRPRIV(net_dev);
302 int plen = sizeof(llc_oui_pid_pad) + ETH_HLEN;
303 342
304 pr_debug("br2684_push\n"); 343 pr_debug("br2684_push\n");
305 344
@@ -320,35 +359,58 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
320 atm_return(atmvcc, skb->truesize); 359 atm_return(atmvcc, skb->truesize);
321 pr_debug("skb from brdev %p\n", brdev); 360 pr_debug("skb from brdev %p\n", brdev);
322 if (brvcc->encaps == e_llc) { 361 if (brvcc->encaps == e_llc) {
323 /* let us waste some time for checking the encapsulation. 362
324 Note, that only 7 char is checked so frames with a valid FCS 363 if (skb->len > 7 && skb->data[7] == 0x01)
325 are also accepted (but FCS is not checked of course) */ 364 __skb_trim(skb, skb->len - 4);
326 if (memcmp(skb->data, llc_oui_pid_pad, 7)) { 365
366 /* accept packets that have "ipv[46]" in the snap header */
367 if ((skb->len >= (sizeof(llc_oui_ipv4)))
368 &&
369 (memcmp
370 (skb->data, llc_oui_ipv4,
371 sizeof(llc_oui_ipv4) - BR2684_ETHERTYPE_LEN) == 0)) {
372 if (memcmp
373 (skb->data + 6, ethertype_ipv6,
374 sizeof(ethertype_ipv6)) == 0)
375 skb->protocol = __constant_htons(ETH_P_IPV6);
376 else if (memcmp
377 (skb->data + 6, ethertype_ipv4,
378 sizeof(ethertype_ipv4)) == 0)
379 skb->protocol = __constant_htons(ETH_P_IP);
380 else {
381 brdev->stats.rx_errors++;
382 dev_kfree_skb(skb);
383 return;
384 }
385 skb_pull(skb, sizeof(llc_oui_ipv4));
386 skb_reset_network_header(skb);
387 skb->pkt_type = PACKET_HOST;
388 /*
389 * Let us waste some time for checking the encapsulation.
390 * Note, that only 7 char is checked so frames with a valid FCS
391 * are also accepted (but FCS is not checked of course).
392 */
393 } else if ((skb->len >= sizeof(llc_oui_pid_pad)) &&
394 (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) {
395 skb_pull(skb, sizeof(llc_oui_pid_pad));
396 skb->protocol = eth_type_trans(skb, net_dev);
397 } else {
327 brdev->stats.rx_errors++; 398 brdev->stats.rx_errors++;
328 dev_kfree_skb(skb); 399 dev_kfree_skb(skb);
329 return; 400 return;
330 } 401 }
331 402
332 /* Strip FCS if present */
333 if (skb->len > 7 && skb->data[7] == 0x01)
334 __skb_trim(skb, skb->len - 4);
335 } else { 403 } else {
336 plen = PADLEN + ETH_HLEN; /* pad, dstmac,srcmac, ethtype */
337 /* first 2 chars should be 0 */ 404 /* first 2 chars should be 0 */
338 if (*((u16 *) (skb->data)) != 0) { 405 if (*((u16 *) (skb->data)) != 0) {
339 brdev->stats.rx_errors++; 406 brdev->stats.rx_errors++;
340 dev_kfree_skb(skb); 407 dev_kfree_skb(skb);
341 return; 408 return;
342 } 409 }
343 } 410 skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */
344 if (skb->len < plen) { 411 skb->protocol = eth_type_trans(skb, net_dev);
345 brdev->stats.rx_errors++;
346 dev_kfree_skb(skb); /* dev_ not needed? */
347 return;
348 } 412 }
349 413
350 skb_pull(skb, plen - ETH_HLEN);
351 skb->protocol = eth_type_trans(skb, net_dev);
352#ifdef CONFIG_ATM_BR2684_IPFILTER 414#ifdef CONFIG_ATM_BR2684_IPFILTER
353 if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { 415 if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) {
354 brdev->stats.rx_dropped++; 416 brdev->stats.rx_dropped++;
@@ -372,11 +434,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
372 netif_rx(skb); 434 netif_rx(skb);
373} 435}
374 436
375static int br2684_regvcc(struct atm_vcc *atmvcc, void __user *arg) 437/*
438 * Assign a vcc to a dev
439 * Note: we do not have explicit unassign, but look at _push()
440 */
441static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
376{ 442{
377/* assign a vcc to a dev
378Note: we do not have explicit unassign, but look at _push()
379*/
380 int err; 443 int err;
381 struct br2684_vcc *brvcc; 444 struct br2684_vcc *brvcc;
382 struct sk_buff *skb; 445 struct sk_buff *skb;
@@ -395,7 +458,7 @@ Note: we do not have explicit unassign, but look at _push()
395 net_dev = br2684_find_dev(&be.ifspec); 458 net_dev = br2684_find_dev(&be.ifspec);
396 if (net_dev == NULL) { 459 if (net_dev == NULL) {
397 printk(KERN_ERR 460 printk(KERN_ERR
398 "br2684: tried to attach to non-existant device\n"); 461 "br2684: tried to attach to non-existant device\n");
399 err = -ENXIO; 462 err = -ENXIO;
400 goto error; 463 goto error;
401 } 464 }
@@ -411,13 +474,15 @@ Note: we do not have explicit unassign, but look at _push()
411 } 474 }
412 if (be.fcs_in != BR2684_FCSIN_NO || be.fcs_out != BR2684_FCSOUT_NO || 475 if (be.fcs_in != BR2684_FCSIN_NO || be.fcs_out != BR2684_FCSOUT_NO ||
413 be.fcs_auto || be.has_vpiid || be.send_padding || (be.encaps != 476 be.fcs_auto || be.has_vpiid || be.send_padding || (be.encaps !=
414 BR2684_ENCAPS_VC && be.encaps != BR2684_ENCAPS_LLC) || 477 BR2684_ENCAPS_VC
415 be.min_size != 0) { 478 && be.encaps !=
479 BR2684_ENCAPS_LLC)
480 || be.min_size != 0) {
416 err = -EINVAL; 481 err = -EINVAL;
417 goto error; 482 goto error;
418 } 483 }
419 pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, 484 pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc,
420 brvcc); 485 be.encaps, brvcc);
421 if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { 486 if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) {
422 unsigned char *esi = atmvcc->dev->esi; 487 unsigned char *esi = atmvcc->dev->esi;
423 if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5]) 488 if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5])
@@ -430,7 +495,7 @@ Note: we do not have explicit unassign, but look at _push()
430 brvcc->device = net_dev; 495 brvcc->device = net_dev;
431 brvcc->atmvcc = atmvcc; 496 brvcc->atmvcc = atmvcc;
432 atmvcc->user_back = brvcc; 497 atmvcc->user_back = brvcc;
433 brvcc->encaps = (enum br2684_encaps) be.encaps; 498 brvcc->encaps = (enum br2684_encaps)be.encaps;
434 brvcc->old_push = atmvcc->push; 499 brvcc->old_push = atmvcc->push;
435 barrier(); 500 barrier();
436 atmvcc->push = br2684_push; 501 atmvcc->push = br2684_push;
@@ -461,7 +526,7 @@ Note: we do not have explicit unassign, but look at _push()
461 } 526 }
462 __module_get(THIS_MODULE); 527 __module_get(THIS_MODULE);
463 return 0; 528 return 0;
464 error: 529 error:
465 write_unlock_irq(&devs_lock); 530 write_unlock_irq(&devs_lock);
466 kfree(brvcc); 531 kfree(brvcc);
467 return err; 532 return err;
@@ -482,25 +547,52 @@ static void br2684_setup(struct net_device *netdev)
482 INIT_LIST_HEAD(&brdev->brvccs); 547 INIT_LIST_HEAD(&brdev->brvccs);
483} 548}
484 549
485static int br2684_create(void __user *arg) 550static void br2684_setup_routed(struct net_device *netdev)
551{
552 struct br2684_dev *brdev = BRPRIV(netdev);
553 brdev->net_dev = netdev;
554
555 netdev->hard_header_len = 0;
556 my_eth_mac_addr = netdev->set_mac_address;
557 netdev->set_mac_address = br2684_mac_addr;
558 netdev->hard_start_xmit = br2684_start_xmit;
559 netdev->get_stats = br2684_get_stats;
560 netdev->addr_len = 0;
561 netdev->mtu = 1500;
562 netdev->type = ARPHRD_PPP;
563 netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
564 netdev->tx_queue_len = 100;
565 INIT_LIST_HEAD(&brdev->brvccs);
566}
567
568static int br2684_create(void __user * arg)
486{ 569{
487 int err; 570 int err;
488 struct net_device *netdev; 571 struct net_device *netdev;
489 struct br2684_dev *brdev; 572 struct br2684_dev *brdev;
490 struct atm_newif_br2684 ni; 573 struct atm_newif_br2684 ni;
574 enum br2684_payload payload;
491 575
492 pr_debug("br2684_create\n"); 576 pr_debug("br2684_create\n");
493 577
494 if (copy_from_user(&ni, arg, sizeof ni)) { 578 if (copy_from_user(&ni, arg, sizeof ni)) {
495 return -EFAULT; 579 return -EFAULT;
496 } 580 }
581
582 if (ni.media & BR2684_FLAG_ROUTED)
583 payload = p_routed;
584 else
585 payload = p_bridged;
586 ni.media &= 0xffff; /* strip flags */
587
497 if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) { 588 if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) {
498 return -EINVAL; 589 return -EINVAL;
499 } 590 }
500 591
501 netdev = alloc_netdev(sizeof(struct br2684_dev), 592 netdev = alloc_netdev(sizeof(struct br2684_dev),
502 ni.ifname[0] ? ni.ifname : "nas%d", 593 ni.ifname[0] ? ni.ifname : "nas%d",
503 br2684_setup); 594 (payload == p_routed) ?
595 br2684_setup_routed : br2684_setup);
504 if (!netdev) 596 if (!netdev)
505 return -ENOMEM; 597 return -ENOMEM;
506 598
@@ -516,6 +608,7 @@ static int br2684_create(void __user *arg)
516 } 608 }
517 609
518 write_lock_irq(&devs_lock); 610 write_lock_irq(&devs_lock);
611 brdev->payload = payload;
519 brdev->number = list_empty(&br2684_devs) ? 1 : 612 brdev->number = list_empty(&br2684_devs) ? 1 :
520 BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; 613 BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
521 list_add_tail(&brdev->br2684_devs, &br2684_devs); 614 list_add_tail(&brdev->br2684_devs, &br2684_devs);
@@ -528,16 +621,16 @@ static int br2684_create(void __user *arg)
528 * -ENOIOCTLCMD for any unrecognized ioctl 621 * -ENOIOCTLCMD for any unrecognized ioctl
529 */ 622 */
530static int br2684_ioctl(struct socket *sock, unsigned int cmd, 623static int br2684_ioctl(struct socket *sock, unsigned int cmd,
531 unsigned long arg) 624 unsigned long arg)
532{ 625{
533 struct atm_vcc *atmvcc = ATM_SD(sock); 626 struct atm_vcc *atmvcc = ATM_SD(sock);
534 void __user *argp = (void __user *)arg; 627 void __user *argp = (void __user *)arg;
628 atm_backend_t b;
535 629
536 int err; 630 int err;
537 switch(cmd) { 631 switch (cmd) {
538 case ATM_SETBACKEND: 632 case ATM_SETBACKEND:
539 case ATM_NEWBACKENDIF: { 633 case ATM_NEWBACKENDIF:
540 atm_backend_t b;
541 err = get_user(b, (atm_backend_t __user *) argp); 634 err = get_user(b, (atm_backend_t __user *) argp);
542 if (err) 635 if (err)
543 return -EFAULT; 636 return -EFAULT;
@@ -549,7 +642,6 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
549 return br2684_regvcc(atmvcc, argp); 642 return br2684_regvcc(atmvcc, argp);
550 else 643 else
551 return br2684_create(argp); 644 return br2684_create(argp);
552 }
553#ifdef CONFIG_ATM_BR2684_IPFILTER 645#ifdef CONFIG_ATM_BR2684_IPFILTER
554 case BR2684_SETFILT: 646 case BR2684_SETFILT:
555 if (atmvcc->push != br2684_push) 647 if (atmvcc->push != br2684_push)
@@ -557,6 +649,7 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
557 if (!capable(CAP_NET_ADMIN)) 649 if (!capable(CAP_NET_ADMIN))
558 return -EPERM; 650 return -EPERM;
559 err = br2684_setfilt(atmvcc, argp); 651 err = br2684_setfilt(atmvcc, argp);
652
560 return err; 653 return err;
561#endif /* CONFIG_ATM_BR2684_IPFILTER */ 654#endif /* CONFIG_ATM_BR2684_IPFILTER */
562 } 655 }
@@ -564,24 +657,25 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
564} 657}
565 658
566static struct atm_ioctl br2684_ioctl_ops = { 659static struct atm_ioctl br2684_ioctl_ops = {
567 .owner = THIS_MODULE, 660 .owner = THIS_MODULE,
568 .ioctl = br2684_ioctl, 661 .ioctl = br2684_ioctl,
569}; 662};
570 663
571
572#ifdef CONFIG_PROC_FS 664#ifdef CONFIG_PROC_FS
573static void *br2684_seq_start(struct seq_file *seq, loff_t *pos) 665static void *br2684_seq_start(struct seq_file *seq, loff_t * pos)
666 __acquires(devs_lock)
574{ 667{
575 read_lock(&devs_lock); 668 read_lock(&devs_lock);
576 return seq_list_start(&br2684_devs, *pos); 669 return seq_list_start(&br2684_devs, *pos);
577} 670}
578 671
579static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t *pos) 672static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t * pos)
580{ 673{
581 return seq_list_next(v, &br2684_devs, pos); 674 return seq_list_next(v, &br2684_devs, pos);
582} 675}
583 676
584static void br2684_seq_stop(struct seq_file *seq, void *v) 677static void br2684_seq_stop(struct seq_file *seq, void *v)
678 __releases(devs_lock)
585{ 679{
586 read_unlock(&devs_lock); 680 read_unlock(&devs_lock);
587} 681}
@@ -589,7 +683,7 @@ static void br2684_seq_stop(struct seq_file *seq, void *v)
589static int br2684_seq_show(struct seq_file *seq, void *v) 683static int br2684_seq_show(struct seq_file *seq, void *v)
590{ 684{
591 const struct br2684_dev *brdev = list_entry(v, struct br2684_dev, 685 const struct br2684_dev *brdev = list_entry(v, struct br2684_dev,
592 br2684_devs); 686 br2684_devs);
593 const struct net_device *net_dev = brdev->net_dev; 687 const struct net_device *net_dev = brdev->net_dev;
594 const struct br2684_vcc *brvcc; 688 const struct br2684_vcc *brvcc;
595 DECLARE_MAC_BUF(mac); 689 DECLARE_MAC_BUF(mac);
@@ -601,21 +695,19 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
601 brdev->mac_was_set ? "set" : "auto"); 695 brdev->mac_was_set ? "set" : "auto");
602 696
603 list_for_each_entry(brvcc, &brdev->brvccs, brvccs) { 697 list_for_each_entry(brvcc, &brdev->brvccs, brvccs) {
604 seq_printf(seq, " vcc %d.%d.%d: encaps=%s" 698 seq_printf(seq, " vcc %d.%d.%d: encaps=%s payload=%s"
605 ", failed copies %u/%u" 699 ", failed copies %u/%u"
606 "\n", brvcc->atmvcc->dev->number, 700 "\n", brvcc->atmvcc->dev->number,
607 brvcc->atmvcc->vpi, brvcc->atmvcc->vci, 701 brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
608 (brvcc->encaps == e_llc) ? "LLC" : "VC" 702 (brvcc->encaps == e_llc) ? "LLC" : "VC",
609 , brvcc->copies_failed 703 (brdev->payload == p_bridged) ? "bridged" : "routed",
610 , brvcc->copies_needed 704 brvcc->copies_failed, brvcc->copies_needed);
611 );
612#ifdef CONFIG_ATM_BR2684_IPFILTER 705#ifdef CONFIG_ATM_BR2684_IPFILTER
613#define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte] 706#define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte]
614#define bs(var) b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3) 707#define bs(var) b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3)
615 if (brvcc->filter.netmask != 0) 708 if (brvcc->filter.netmask != 0)
616 seq_printf(seq, " filter=%d.%d.%d.%d/" 709 seq_printf(seq, " filter=%d.%d.%d.%d/"
617 "%d.%d.%d.%d\n", 710 "%d.%d.%d.%d\n", bs(prefix), bs(netmask));
618 bs(prefix), bs(netmask));
619#undef bs 711#undef bs
620#undef b1 712#undef b1
621#endif /* CONFIG_ATM_BR2684_IPFILTER */ 713#endif /* CONFIG_ATM_BR2684_IPFILTER */
@@ -625,9 +717,9 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
625 717
626static const struct seq_operations br2684_seq_ops = { 718static const struct seq_operations br2684_seq_ops = {
627 .start = br2684_seq_start, 719 .start = br2684_seq_start,
628 .next = br2684_seq_next, 720 .next = br2684_seq_next,
629 .stop = br2684_seq_stop, 721 .stop = br2684_seq_stop,
630 .show = br2684_seq_show, 722 .show = br2684_seq_show,
631}; 723};
632 724
633static int br2684_proc_open(struct inode *inode, struct file *file) 725static int br2684_proc_open(struct inode *inode, struct file *file)
@@ -636,15 +728,15 @@ static int br2684_proc_open(struct inode *inode, struct file *file)
636} 728}
637 729
638static const struct file_operations br2684_proc_ops = { 730static const struct file_operations br2684_proc_ops = {
639 .owner = THIS_MODULE, 731 .owner = THIS_MODULE,
640 .open = br2684_proc_open, 732 .open = br2684_proc_open,
641 .read = seq_read, 733 .read = seq_read,
642 .llseek = seq_lseek, 734 .llseek = seq_lseek,
643 .release = seq_release, 735 .release = seq_release,
644}; 736};
645 737
646extern struct proc_dir_entry *atm_proc_root; /* from proc.c */ 738extern struct proc_dir_entry *atm_proc_root; /* from proc.c */
647#endif 739#endif /* CONFIG_PROC_FS */
648 740
649static int __init br2684_init(void) 741static int __init br2684_init(void)
650{ 742{
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 741742f00797..86b885ec1cbd 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -285,7 +285,7 @@ static int clip_constructor(struct neighbour *neigh)
285 struct neigh_parms *parms; 285 struct neigh_parms *parms;
286 286
287 pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry); 287 pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry);
288 neigh->type = inet_addr_type(entry->ip); 288 neigh->type = inet_addr_type(&init_net, entry->ip);
289 if (neigh->type != RTN_UNICAST) 289 if (neigh->type != RTN_UNICAST)
290 return -EINVAL; 290 return -EINVAL;
291 291
@@ -534,7 +534,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
534 unlink_clip_vcc(clip_vcc); 534 unlink_clip_vcc(clip_vcc);
535 return 0; 535 return 0;
536 } 536 }
537 error = ip_route_output_key(&rt, &fl); 537 error = ip_route_output_key(&init_net, &rt, &fl);
538 if (error) 538 if (error)
539 return error; 539 return error;
540 neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); 540 neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1);
@@ -903,6 +903,8 @@ static void *clip_seq_sub_iter(struct neigh_seq_state *_state,
903 903
904static void *clip_seq_start(struct seq_file *seq, loff_t * pos) 904static void *clip_seq_start(struct seq_file *seq, loff_t * pos)
905{ 905{
906 struct clip_seq_state *state = seq->private;
907 state->ns.neigh_sub_iter = clip_seq_sub_iter;
906 return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY); 908 return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY);
907} 909}
908 910
@@ -932,36 +934,15 @@ static const struct seq_operations arp_seq_ops = {
932 934
933static int arp_seq_open(struct inode *inode, struct file *file) 935static int arp_seq_open(struct inode *inode, struct file *file)
934{ 936{
935 struct clip_seq_state *state; 937 return seq_open_net(inode, file, &arp_seq_ops,
936 struct seq_file *seq; 938 sizeof(struct clip_seq_state));
937 int rc = -EAGAIN;
938
939 state = kzalloc(sizeof(*state), GFP_KERNEL);
940 if (!state) {
941 rc = -ENOMEM;
942 goto out_kfree;
943 }
944 state->ns.neigh_sub_iter = clip_seq_sub_iter;
945
946 rc = seq_open(file, &arp_seq_ops);
947 if (rc)
948 goto out_kfree;
949
950 seq = file->private_data;
951 seq->private = state;
952out:
953 return rc;
954
955out_kfree:
956 kfree(state);
957 goto out;
958} 939}
959 940
960static const struct file_operations arp_seq_fops = { 941static const struct file_operations arp_seq_fops = {
961 .open = arp_seq_open, 942 .open = arp_seq_open,
962 .read = seq_read, 943 .read = seq_read,
963 .llseek = seq_lseek, 944 .llseek = seq_lseek,
964 .release = seq_release_private, 945 .release = seq_release_net,
965 .owner = THIS_MODULE 946 .owner = THIS_MODULE
966}; 947};
967#endif 948#endif
diff --git a/net/atm/common.c b/net/atm/common.c
index e166d9e0ffd9..c865517ba449 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -113,7 +113,7 @@ static void vcc_write_space(struct sock *sk)
113 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 113 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
114 wake_up_interruptible(sk->sk_sleep); 114 wake_up_interruptible(sk->sk_sleep);
115 115
116 sk_wake_async(sk, 2, POLL_OUT); 116 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
117 } 117 }
118 118
119 read_unlock(&sk->sk_callback_lock); 119 read_unlock(&sk->sk_callback_lock);
@@ -133,7 +133,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
133 sock->sk = NULL; 133 sock->sk = NULL;
134 if (sock->type == SOCK_STREAM) 134 if (sock->type == SOCK_STREAM)
135 return -EINVAL; 135 return -EINVAL;
136 sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); 136 sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto);
137 if (!sk) 137 if (!sk)
138 return -ENOMEM; 138 return -ENOMEM;
139 sock_init_data(sock, sk); 139 sock_init_data(sock, sk);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 7eb1b21a0e94..1a8c4c6c0cd0 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -176,7 +176,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
176static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) 176static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
177{ 177{
178 struct trh_hdr *trh; 178 struct trh_hdr *trh;
179 int riflen, num_rdsc; 179 unsigned int riflen, num_rdsc;
180 180
181 trh = (struct trh_hdr *)packet; 181 trh = (struct trh_hdr *)packet;
182 if (trh->daddr[0] & (uint8_t) 0x80) 182 if (trh->daddr[0] & (uint8_t) 0x80)
@@ -1789,9 +1789,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
1789 } 1789 }
1790 memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); 1790 memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
1791 INIT_HLIST_NODE(&to_return->next); 1791 INIT_HLIST_NODE(&to_return->next);
1792 init_timer(&to_return->timer); 1792 setup_timer(&to_return->timer, lec_arp_expire_arp,
1793 to_return->timer.function = lec_arp_expire_arp; 1793 (unsigned long)to_return);
1794 to_return->timer.data = (unsigned long)to_return;
1795 to_return->last_used = jiffies; 1794 to_return->last_used = jiffies;
1796 to_return->priv = priv; 1795 to_return->priv = priv;
1797 skb_queue_head_init(&to_return->tx_wait); 1796 skb_queue_head_init(&to_return->tx_wait);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 2086396de177..9c7f712fc7e9 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -542,6 +542,13 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev)
542 if (eth->h_proto != htons(ETH_P_IP)) 542 if (eth->h_proto != htons(ETH_P_IP))
543 goto non_ip; /* Multi-Protocol Over ATM :-) */ 543 goto non_ip; /* Multi-Protocol Over ATM :-) */
544 544
545 /* Weed out funny packets (e.g., AF_PACKET or raw). */
546 if (skb->len < ETH_HLEN + sizeof(struct iphdr))
547 goto non_ip;
548 skb_set_network_header(skb, ETH_HLEN);
549 if (skb->len < ETH_HLEN + ip_hdr(skb)->ihl * 4 || ip_hdr(skb)->ihl < 5)
550 goto non_ip;
551
545 while (i < mpc->number_of_mps_macs) { 552 while (i < mpc->number_of_mps_macs) {
546 if (!compare_ether_addr(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN))) 553 if (!compare_ether_addr(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN)))
547 if ( send_via_shortcut(skb, mpc) == 0 ) /* try shortcut */ 554 if ( send_via_shortcut(skb, mpc) == 0 ) /* try shortcut */
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 5d9d5ffba145..49125110bb8b 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -142,6 +142,7 @@ static int vcc_seq_release(struct inode *inode, struct file *file)
142} 142}
143 143
144static void *vcc_seq_start(struct seq_file *seq, loff_t *pos) 144static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
145 __acquires(vcc_sklist_lock)
145{ 146{
146 struct vcc_state *state = seq->private; 147 struct vcc_state *state = seq->private;
147 loff_t left = *pos; 148 loff_t left = *pos;
@@ -152,6 +153,7 @@ static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
152} 153}
153 154
154static void vcc_seq_stop(struct seq_file *seq, void *v) 155static void vcc_seq_stop(struct seq_file *seq, void *v)
156 __releases(vcc_sklist_lock)
155{ 157{
156 read_unlock(&vcc_sklist_lock); 158 read_unlock(&vcc_sklist_lock);
157} 159}
@@ -476,7 +478,7 @@ static void atm_proc_dirs_remove(void)
476 if (e->dirent) 478 if (e->dirent)
477 remove_proc_entry(e->name, atm_proc_root); 479 remove_proc_entry(e->name, atm_proc_root);
478 } 480 }
479 remove_proc_entry("atm", init_net.proc_net); 481 proc_net_remove(&init_net, "atm");
480} 482}
481 483
482int __init atm_proc_init(void) 484int __init atm_proc_init(void)
@@ -484,7 +486,7 @@ int __init atm_proc_init(void)
484 static struct atm_proc_entry *e; 486 static struct atm_proc_entry *e;
485 int ret; 487 int ret;
486 488
487 atm_proc_root = proc_mkdir("atm", init_net.proc_net); 489 atm_proc_root = proc_net_mkdir(&init_net, "atm", init_net.proc_net);
488 if (!atm_proc_root) 490 if (!atm_proc_root)
489 goto err_out; 491 goto err_out;
490 for (e = atm_proc_ents; e->name; e++) { 492 for (e = atm_proc_ents; e->name; e++) {
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 993e5c75e909..8fc64e3150a2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -87,10 +87,22 @@ static void ax25_kill_by_device(struct net_device *dev)
87 return; 87 return;
88 88
89 spin_lock_bh(&ax25_list_lock); 89 spin_lock_bh(&ax25_list_lock);
90again:
90 ax25_for_each(s, node, &ax25_list) { 91 ax25_for_each(s, node, &ax25_list) {
91 if (s->ax25_dev == ax25_dev) { 92 if (s->ax25_dev == ax25_dev) {
92 s->ax25_dev = NULL; 93 s->ax25_dev = NULL;
94 spin_unlock_bh(&ax25_list_lock);
93 ax25_disconnect(s, ENETUNREACH); 95 ax25_disconnect(s, ENETUNREACH);
96 spin_lock_bh(&ax25_list_lock);
97
98 /* The entry could have been deleted from the
99 * list meanwhile and thus the next pointer is
100 * no longer valid. Play it safe and restart
101 * the scan. Forward progress is ensured
102 * because we set s->ax25_dev to NULL and we
103 * are never passed a NULL 'dev' argument.
104 */
105 goto again;
94 } 106 }
95 } 107 }
96 spin_unlock_bh(&ax25_list_lock); 108 spin_unlock_bh(&ax25_list_lock);
@@ -318,10 +330,9 @@ void ax25_destroy_socket(ax25_cb *ax25)
318 if (atomic_read(&ax25->sk->sk_wmem_alloc) || 330 if (atomic_read(&ax25->sk->sk_wmem_alloc) ||
319 atomic_read(&ax25->sk->sk_rmem_alloc)) { 331 atomic_read(&ax25->sk->sk_rmem_alloc)) {
320 /* Defer: outstanding buffers */ 332 /* Defer: outstanding buffers */
321 init_timer(&ax25->dtimer); 333 setup_timer(&ax25->dtimer, ax25_destroy_timer,
334 (unsigned long)ax25);
322 ax25->dtimer.expires = jiffies + 2 * HZ; 335 ax25->dtimer.expires = jiffies + 2 * HZ;
323 ax25->dtimer.function = ax25_destroy_timer;
324 ax25->dtimer.data = (unsigned long)ax25;
325 add_timer(&ax25->dtimer); 336 add_timer(&ax25->dtimer);
326 } else { 337 } else {
327 struct sock *sk=ax25->sk; 338 struct sock *sk=ax25->sk;
@@ -559,7 +570,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
559 res = -EINVAL; 570 res = -EINVAL;
560 break; 571 break;
561 } 572 }
562 ax25->rtt = (opt * HZ) / 2; 573 ax25->rtt = (opt * HZ) >> 1;
563 ax25->t1 = opt * HZ; 574 ax25->t1 = opt * HZ;
564 break; 575 break;
565 576
@@ -836,7 +847,8 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol)
836 return -ESOCKTNOSUPPORT; 847 return -ESOCKTNOSUPPORT;
837 } 848 }
838 849
839 if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) 850 sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto);
851 if (sk == NULL)
840 return -ENOMEM; 852 return -ENOMEM;
841 853
842 ax25 = sk->sk_protinfo = ax25_create_cb(); 854 ax25 = sk->sk_protinfo = ax25_create_cb();
@@ -861,7 +873,8 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
861 struct sock *sk; 873 struct sock *sk;
862 ax25_cb *ax25, *oax25; 874 ax25_cb *ax25, *oax25;
863 875
864 if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) 876 sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot);
877 if (sk == NULL)
865 return NULL; 878 return NULL;
866 879
867 if ((ax25 = ax25_create_cb()) == NULL) { 880 if ((ax25 = ax25_create_cb()) == NULL) {
@@ -1024,16 +1037,13 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1024 int err = 0; 1037 int err = 0;
1025 1038
1026 if (addr_len != sizeof(struct sockaddr_ax25) && 1039 if (addr_len != sizeof(struct sockaddr_ax25) &&
1027 addr_len != sizeof(struct full_sockaddr_ax25)) { 1040 addr_len != sizeof(struct full_sockaddr_ax25))
1028 /* support for old structure may go away some time */ 1041 /* support for old structure may go away some time
1042 * ax25_bind(): uses old (6 digipeater) socket structure.
1043 */
1029 if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) || 1044 if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
1030 (addr_len > sizeof(struct full_sockaddr_ax25))) { 1045 (addr_len > sizeof(struct full_sockaddr_ax25)))
1031 return -EINVAL; 1046 return -EINVAL;
1032 }
1033
1034 printk(KERN_WARNING "ax25_bind(): %s uses old (6 digipeater) socket structure.\n",
1035 current->comm);
1036 }
1037 1047
1038 if (addr->fsa_ax25.sax25_family != AF_AX25) 1048 if (addr->fsa_ax25.sax25_family != AF_AX25)
1039 return -EINVAL; 1049 return -EINVAL;
@@ -1107,21 +1117,19 @@ static int __must_check ax25_connect(struct socket *sock,
1107 * some sanity checks. code further down depends on this 1117 * some sanity checks. code further down depends on this
1108 */ 1118 */
1109 1119
1110 if (addr_len == sizeof(struct sockaddr_ax25)) { 1120 if (addr_len == sizeof(struct sockaddr_ax25))
1111 /* support for this will go away in early 2.5.x */ 1121 /* support for this will go away in early 2.5.x
1112 printk(KERN_WARNING "ax25_connect(): %s uses obsolete socket structure\n", 1122 * ax25_connect(): uses obsolete socket structure
1113 current->comm); 1123 */
1114 } 1124 ;
1115 else if (addr_len != sizeof(struct full_sockaddr_ax25)) { 1125 else if (addr_len != sizeof(struct full_sockaddr_ax25))
1116 /* support for old structure may go away some time */ 1126 /* support for old structure may go away some time
1127 * ax25_connect(): uses old (6 digipeater) socket structure.
1128 */
1117 if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) || 1129 if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
1118 (addr_len > sizeof(struct full_sockaddr_ax25))) { 1130 (addr_len > sizeof(struct full_sockaddr_ax25)))
1119 return -EINVAL; 1131 return -EINVAL;
1120 }
1121 1132
1122 printk(KERN_WARNING "ax25_connect(): %s uses old (6 digipeater) socket structure.\n",
1123 current->comm);
1124 }
1125 1133
1126 if (fsa->fsa_ax25.sax25_family != AF_AX25) 1134 if (fsa->fsa_ax25.sax25_family != AF_AX25)
1127 return -EINVAL; 1135 return -EINVAL;
@@ -1465,21 +1473,20 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1465 goto out; 1473 goto out;
1466 } 1474 }
1467 1475
1468 if (addr_len == sizeof(struct sockaddr_ax25)) { 1476 if (addr_len == sizeof(struct sockaddr_ax25))
1469 printk(KERN_WARNING "ax25_sendmsg(): %s uses obsolete socket structure\n", 1477 /* ax25_sendmsg(): uses obsolete socket structure */
1470 current->comm); 1478 ;
1471 } 1479 else if (addr_len != sizeof(struct full_sockaddr_ax25))
1472 else if (addr_len != sizeof(struct full_sockaddr_ax25)) { 1480 /* support for old structure may go away some time
1473 /* support for old structure may go away some time */ 1481 * ax25_sendmsg(): uses old (6 digipeater)
1482 * socket structure.
1483 */
1474 if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) || 1484 if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
1475 (addr_len > sizeof(struct full_sockaddr_ax25))) { 1485 (addr_len > sizeof(struct full_sockaddr_ax25))) {
1476 err = -EINVAL; 1486 err = -EINVAL;
1477 goto out; 1487 goto out;
1478 } 1488 }
1479 1489
1480 printk(KERN_WARNING "ax25_sendmsg(): %s uses old (6 digipeater) socket structure.\n",
1481 current->comm);
1482 }
1483 1490
1484 if (addr_len > sizeof(struct sockaddr_ax25) && usax->sax25_ndigis != 0) { 1491 if (addr_len > sizeof(struct sockaddr_ax25) && usax->sax25_ndigis != 0) {
1485 int ct = 0; 1492 int ct = 0;
@@ -1853,6 +1860,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1853#ifdef CONFIG_PROC_FS 1860#ifdef CONFIG_PROC_FS
1854 1861
1855static void *ax25_info_start(struct seq_file *seq, loff_t *pos) 1862static void *ax25_info_start(struct seq_file *seq, loff_t *pos)
1863 __acquires(ax25_list_lock)
1856{ 1864{
1857 struct ax25_cb *ax25; 1865 struct ax25_cb *ax25;
1858 struct hlist_node *node; 1866 struct hlist_node *node;
@@ -1876,6 +1884,7 @@ static void *ax25_info_next(struct seq_file *seq, void *v, loff_t *pos)
1876} 1884}
1877 1885
1878static void ax25_info_stop(struct seq_file *seq, void *v) 1886static void ax25_info_stop(struct seq_file *seq, void *v)
1887 __releases(ax25_list_lock)
1879{ 1888{
1880 spin_unlock_bh(&ax25_list_lock); 1889 spin_unlock_bh(&ax25_list_lock);
1881} 1890}
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 4f44185955c7..c4e3b025d21c 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -130,7 +130,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
130 */ 130 */
131 if (sk != NULL) { 131 if (sk != NULL) {
132 if (atomic_read(&sk->sk_rmem_alloc) < 132 if (atomic_read(&sk->sk_rmem_alloc) <
133 (sk->sk_rcvbuf / 2) && 133 (sk->sk_rcvbuf >> 1) &&
134 (ax25->condition & AX25_COND_OWN_RX_BUSY)) { 134 (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
135 ax25->condition &= ~AX25_COND_OWN_RX_BUSY; 135 ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
136 ax25->condition &= ~AX25_COND_ACK_PENDING; 136 ax25->condition &= ~AX25_COND_ACK_PENDING;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 3b7d1720c2ee..d1be080dcb25 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -124,7 +124,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
124 } 124 }
125 125
126 skb_pull(skb, 1); /* Remove PID */ 126 skb_pull(skb, 1); /* Remove PID */
127 skb_reset_mac_header(skb); 127 skb->mac_header = skb->network_header;
128 skb_reset_network_header(skb); 128 skb_reset_network_header(skb);
129 skb->dev = ax25->ax25_dev->dev; 129 skb->dev = ax25->ax25_dev->dev;
130 skb->pkt_type = PACKET_HOST; 130 skb->pkt_type = PACKET_HOST;
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 9ecf6f1df863..38c7f3087ec3 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -249,6 +249,7 @@ int ax25_rt_ioctl(unsigned int cmd, void __user *arg)
249#ifdef CONFIG_PROC_FS 249#ifdef CONFIG_PROC_FS
250 250
251static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos) 251static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos)
252 __acquires(ax25_route_lock)
252{ 253{
253 struct ax25_route *ax25_rt; 254 struct ax25_route *ax25_rt;
254 int i = 1; 255 int i = 1;
@@ -274,6 +275,7 @@ static void *ax25_rt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
274} 275}
275 276
276static void ax25_rt_seq_stop(struct seq_file *seq, void *v) 277static void ax25_rt_seq_stop(struct seq_file *seq, void *v)
278 __releases(ax25_route_lock)
277{ 279{
278 read_unlock(&ax25_route_lock); 280 read_unlock(&ax25_route_lock);
279} 281}
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index f2f6918ac9bb..96e4b9273250 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -32,7 +32,7 @@
32 32
33void ax25_std_heartbeat_expiry(ax25_cb *ax25) 33void ax25_std_heartbeat_expiry(ax25_cb *ax25)
34{ 34{
35 struct sock *sk=ax25->sk; 35 struct sock *sk = ax25->sk;
36 36
37 if (sk) 37 if (sk)
38 bh_lock_sock(sk); 38 bh_lock_sock(sk);
@@ -62,7 +62,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
62 */ 62 */
63 if (sk != NULL) { 63 if (sk != NULL) {
64 if (atomic_read(&sk->sk_rmem_alloc) < 64 if (atomic_read(&sk->sk_rmem_alloc) <
65 (sk->sk_rcvbuf / 2) && 65 (sk->sk_rcvbuf >> 1) &&
66 (ax25->condition & AX25_COND_OWN_RX_BUSY)) { 66 (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
67 ax25->condition &= ~AX25_COND_OWN_RX_BUSY; 67 ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
68 ax25->condition &= ~AX25_COND_ACK_PENDING; 68 ax25->condition &= ~AX25_COND_ACK_PENDING;
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 5fe9b2a6697d..d8f215733175 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -279,6 +279,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
279 ax25_link_failed(ax25, reason); 279 ax25_link_failed(ax25, reason);
280 280
281 if (ax25->sk != NULL) { 281 if (ax25->sk != NULL) {
282 local_bh_disable();
282 bh_lock_sock(ax25->sk); 283 bh_lock_sock(ax25->sk);
283 ax25->sk->sk_state = TCP_CLOSE; 284 ax25->sk->sk_state = TCP_CLOSE;
284 ax25->sk->sk_err = reason; 285 ax25->sk->sk_err = reason;
@@ -288,5 +289,6 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
288 sock_set_flag(ax25->sk, SOCK_DEAD); 289 sock_set_flag(ax25->sk, SOCK_DEAD);
289 } 290 }
290 bh_unlock_sock(ax25->sk); 291 bh_unlock_sock(ax25->sk);
292 local_bh_enable();
291 } 293 }
292} 294}
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index ce0b13d44385..5f4eb73fb9d3 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -43,10 +43,10 @@
43 * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. 43 * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines.
44 */ 44 */
45 45
46HLIST_HEAD(ax25_uid_list); 46static HLIST_HEAD(ax25_uid_list);
47static DEFINE_RWLOCK(ax25_uid_lock); 47static DEFINE_RWLOCK(ax25_uid_lock);
48 48
49int ax25_uid_policy = 0; 49int ax25_uid_policy;
50 50
51EXPORT_SYMBOL(ax25_uid_policy); 51EXPORT_SYMBOL(ax25_uid_policy);
52 52
@@ -144,6 +144,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
144#ifdef CONFIG_PROC_FS 144#ifdef CONFIG_PROC_FS
145 145
146static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) 146static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
147 __acquires(ax25_uid_lock)
147{ 148{
148 struct ax25_uid_assoc *pt; 149 struct ax25_uid_assoc *pt;
149 struct hlist_node *node; 150 struct hlist_node *node;
@@ -167,6 +168,7 @@ static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
167} 168}
168 169
169static void ax25_uid_seq_stop(struct seq_file *seq, void *v) 170static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
171 __releases(ax25_uid_lock)
170{ 172{
171 read_unlock(&ax25_uid_lock); 173 read_unlock(&ax25_uid_lock);
172} 174}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 443a83676638..f597987b2424 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -31,25 +31,11 @@ static struct ctl_table_header *ax25_table_header;
31static ctl_table *ax25_table; 31static ctl_table *ax25_table;
32static int ax25_table_size; 32static int ax25_table_size;
33 33
34static ctl_table ax25_dir_table[] = { 34static struct ctl_path ax25_path[] = {
35 { 35 { .procname = "net", .ctl_name = CTL_NET, },
36 .ctl_name = NET_AX25, 36 { .procname = "ax25", .ctl_name = NET_AX25, },
37 .procname = "ax25", 37 { }
38 .mode = 0555,
39 },
40 { .ctl_name = 0 }
41};
42
43static ctl_table ax25_root_table[] = {
44 {
45 .ctl_name = CTL_NET,
46 .procname = "net",
47 .mode = 0555,
48 .child = ax25_dir_table
49 },
50 { .ctl_name = 0 }
51}; 38};
52
53static const ctl_table ax25_param_table[] = { 39static const ctl_table ax25_param_table[] = {
54 { 40 {
55 .ctl_name = NET_AX25_IP_DEFAULT_MODE, 41 .ctl_name = NET_AX25_IP_DEFAULT_MODE,
@@ -243,9 +229,7 @@ void ax25_register_sysctl(void)
243 } 229 }
244 spin_unlock_bh(&ax25_dev_lock); 230 spin_unlock_bh(&ax25_dev_lock);
245 231
246 ax25_dir_table[0].child = ax25_table; 232 ax25_table_header = register_sysctl_paths(ax25_path, ax25_table);
247
248 ax25_table_header = register_sysctl_table(ax25_root_table);
249} 233}
250 234
251void ax25_unregister_sysctl(void) 235void ax25_unregister_sysctl(void)
@@ -253,7 +237,6 @@ void ax25_unregister_sysctl(void)
253 ctl_table *p; 237 ctl_table *p;
254 unregister_sysctl_table(ax25_table_header); 238 unregister_sysctl_table(ax25_table_header);
255 239
256 ax25_dir_table[0].child = NULL;
257 for (p = ax25_table; p->ctl_name; p++) 240 for (p = ax25_table; p->ctl_name; p++)
258 kfree(p->child); 241 kfree(p->child);
259 kfree(ax25_table); 242 kfree(ax25_table);
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index f718965f296c..81065e548a1f 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -94,7 +94,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
94 return err; 94 return err;
95 95
96 if (nsock->sk->sk_state != BT_CONNECTED) { 96 if (nsock->sk->sk_state != BT_CONNECTED) {
97 fput(nsock->file); 97 sockfd_put(nsock);
98 return -EBADFD; 98 return -EBADFD;
99 } 99 }
100 100
@@ -103,7 +103,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
103 if (copy_to_user(argp, &ca, sizeof(ca))) 103 if (copy_to_user(argp, &ca, sizeof(ca)))
104 err = -EFAULT; 104 err = -EFAULT;
105 } else 105 } else
106 fput(nsock->file); 106 sockfd_put(nsock);
107 107
108 return err; 108 return err;
109 109
@@ -213,7 +213,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol)
213 if (sock->type != SOCK_RAW) 213 if (sock->type != SOCK_RAW)
214 return -ESOCKTNOSUPPORT; 214 return -ESOCKTNOSUPPORT;
215 215
216 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); 216 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto);
217 if (!sk) 217 if (!sk)
218 return -ENOMEM; 218 return -ENOMEM;
219 219
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index cf700c20d11e..8c7f7bc4e0ba 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -88,7 +88,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
88 return err; 88 return err;
89 89
90 if (nsock->sk->sk_state != BT_CONNECTED) { 90 if (nsock->sk->sk_state != BT_CONNECTED) {
91 fput(nsock->file); 91 sockfd_put(nsock);
92 return -EBADFD; 92 return -EBADFD;
93 } 93 }
94 94
@@ -97,7 +97,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
97 if (copy_to_user(argp, &ca, sizeof(ca))) 97 if (copy_to_user(argp, &ca, sizeof(ca)))
98 err = -EFAULT; 98 err = -EFAULT;
99 } else 99 } else
100 fput(nsock->file); 100 sockfd_put(nsock);
101 101
102 return err; 102 return err;
103 103
@@ -204,7 +204,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol)
204 if (sock->type != SOCK_RAW) 204 if (sock->type != SOCK_RAW)
205 return -ESOCKTNOSUPPORT; 205 return -ESOCKTNOSUPPORT;
206 206
207 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); 207 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto);
208 if (!sk) 208 if (!sk)
209 return -ENOMEM; 209 return -ENOMEM;
210 210
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 9483320f6dad..5fc7be206f62 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -208,13 +208,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
208 208
209 skb_queue_head_init(&conn->data_q); 209 skb_queue_head_init(&conn->data_q);
210 210
211 init_timer(&conn->disc_timer); 211 setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn);
212 conn->disc_timer.function = hci_conn_timeout; 212 setup_timer(&conn->idle_timer, hci_conn_idle, (unsigned long)conn);
213 conn->disc_timer.data = (unsigned long) conn;
214
215 init_timer(&conn->idle_timer);
216 conn->idle_timer.function = hci_conn_idle;
217 conn->idle_timer.data = (unsigned long) conn;
218 213
219 atomic_set(&conn->refcnt, 0); 214 atomic_set(&conn->refcnt, 0);
220 215
@@ -259,22 +254,14 @@ int hci_conn_del(struct hci_conn *conn)
259 } 254 }
260 255
261 tasklet_disable(&hdev->tx_task); 256 tasklet_disable(&hdev->tx_task);
262
263 hci_conn_del_sysfs(conn);
264
265 hci_conn_hash_del(hdev, conn); 257 hci_conn_hash_del(hdev, conn);
266 if (hdev->notify) 258 if (hdev->notify)
267 hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); 259 hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
268
269 tasklet_enable(&hdev->tx_task); 260 tasklet_enable(&hdev->tx_task);
270
271 skb_queue_purge(&conn->data_q); 261 skb_queue_purge(&conn->data_q);
272 262 hci_conn_del_sysfs(conn);
273 hci_dev_put(hdev); 263 hci_dev_put(hdev);
274 264
275 /* will free via device release */
276 put_device(&conn->dev);
277
278 return 0; 265 return 0;
279} 266}
280 267
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 8825102c517c..14991323c273 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -645,7 +645,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol)
645 645
646 sock->ops = &hci_sock_ops; 646 sock->ops = &hci_sock_ops;
647 647
648 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); 648 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto);
649 if (!sk) 649 if (!sk)
650 return -ENOMEM; 650 return -ENOMEM;
651 651
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index cef1e3e1881c..e13cf5ef144c 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -12,6 +12,8 @@
12#undef BT_DBG 12#undef BT_DBG
13#define BT_DBG(D...) 13#define BT_DBG(D...)
14#endif 14#endif
15static struct workqueue_struct *btaddconn;
16static struct workqueue_struct *btdelconn;
15 17
16static inline char *typetostr(int type) 18static inline char *typetostr(int type)
17{ 19{
@@ -279,6 +281,8 @@ static void add_conn(struct work_struct *work)
279 struct hci_conn *conn = container_of(work, struct hci_conn, work); 281 struct hci_conn *conn = container_of(work, struct hci_conn, work);
280 int i; 282 int i;
281 283
284 flush_workqueue(btdelconn);
285
282 if (device_add(&conn->dev) < 0) { 286 if (device_add(&conn->dev) < 0) {
283 BT_ERR("Failed to register connection device"); 287 BT_ERR("Failed to register connection device");
284 return; 288 return;
@@ -313,13 +317,31 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
313 317
314 INIT_WORK(&conn->work, add_conn); 318 INIT_WORK(&conn->work, add_conn);
315 319
316 schedule_work(&conn->work); 320 queue_work(btaddconn, &conn->work);
321}
322
323static int __match_tty(struct device *dev, void *data)
324{
325 /* The rfcomm tty device will possibly retain even when conn
326 * is down, and sysfs doesn't support move zombie device,
327 * so we should move the device before conn device is destroyed.
328 * Due to the only child device of hci_conn dev is rfcomm
329 * tty_dev, here just return 1
330 */
331 return 1;
317} 332}
318 333
319static void del_conn(struct work_struct *work) 334static void del_conn(struct work_struct *work)
320{ 335{
336 struct device *dev;
321 struct hci_conn *conn = container_of(work, struct hci_conn, work); 337 struct hci_conn *conn = container_of(work, struct hci_conn, work);
338
339 while (dev = device_find_child(&conn->dev, NULL, __match_tty)) {
340 device_move(dev, NULL);
341 put_device(dev);
342 }
322 device_del(&conn->dev); 343 device_del(&conn->dev);
344 put_device(&conn->dev);
323} 345}
324 346
325void hci_conn_del_sysfs(struct hci_conn *conn) 347void hci_conn_del_sysfs(struct hci_conn *conn)
@@ -331,7 +353,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
331 353
332 INIT_WORK(&conn->work, del_conn); 354 INIT_WORK(&conn->work, del_conn);
333 355
334 schedule_work(&conn->work); 356 queue_work(btdelconn, &conn->work);
335} 357}
336 358
337int hci_register_sysfs(struct hci_dev *hdev) 359int hci_register_sysfs(struct hci_dev *hdev)
@@ -380,28 +402,54 @@ int __init bt_sysfs_init(void)
380{ 402{
381 int err; 403 int err;
382 404
405 btaddconn = create_singlethread_workqueue("btaddconn");
406 if (!btaddconn) {
407 err = -ENOMEM;
408 goto out;
409 }
410
411 btdelconn = create_singlethread_workqueue("btdelconn");
412 if (!btdelconn) {
413 err = -ENOMEM;
414 goto out_del;
415 }
416
383 bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0); 417 bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0);
384 if (IS_ERR(bt_platform)) 418 if (IS_ERR(bt_platform)) {
385 return PTR_ERR(bt_platform); 419 err = PTR_ERR(bt_platform);
420 goto out_platform;
421 }
386 422
387 err = bus_register(&bt_bus); 423 err = bus_register(&bt_bus);
388 if (err < 0) { 424 if (err < 0)
389 platform_device_unregister(bt_platform); 425 goto out_bus;
390 return err;
391 }
392 426
393 bt_class = class_create(THIS_MODULE, "bluetooth"); 427 bt_class = class_create(THIS_MODULE, "bluetooth");
394 if (IS_ERR(bt_class)) { 428 if (IS_ERR(bt_class)) {
395 bus_unregister(&bt_bus); 429 err = PTR_ERR(bt_class);
396 platform_device_unregister(bt_platform); 430 goto out_class;
397 return PTR_ERR(bt_class);
398 } 431 }
399 432
400 return 0; 433 return 0;
434
435out_class:
436 bus_unregister(&bt_bus);
437out_bus:
438 platform_device_unregister(bt_platform);
439out_platform:
440 destroy_workqueue(btdelconn);
441out_del:
442 destroy_workqueue(btaddconn);
443out:
444 return err;
401} 445}
402 446
403void bt_sysfs_cleanup(void) 447void bt_sysfs_cleanup(void)
404{ 448{
449 destroy_workqueue(btaddconn);
450
451 destroy_workqueue(btdelconn);
452
405 class_destroy(bt_class); 453 class_destroy(bt_class);
406 454
407 bus_unregister(&bt_bus); 455 bus_unregister(&bt_bus);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 4bbacddeb49d..782a22602b86 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -811,10 +811,7 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
811 session->intr_sock = intr_sock; 811 session->intr_sock = intr_sock;
812 session->state = BT_CONNECTED; 812 session->state = BT_CONNECTED;
813 813
814 init_timer(&session->timer); 814 setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session);
815
816 session->timer.function = hidp_idle_timeout;
817 session->timer.data = (unsigned long) session;
818 815
819 skb_queue_head_init(&session->ctrl_transmit); 816 skb_queue_head_init(&session->ctrl_transmit);
820 skb_queue_head_init(&session->intr_transmit); 817 skb_queue_head_init(&session->intr_transmit);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 1de2b6fbcac0..f4dd02ca9a96 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -86,13 +86,13 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
86 86
87 isock = sockfd_lookup(ca.intr_sock, &err); 87 isock = sockfd_lookup(ca.intr_sock, &err);
88 if (!isock) { 88 if (!isock) {
89 fput(csock->file); 89 sockfd_put(csock);
90 return err; 90 return err;
91 } 91 }
92 92
93 if (csock->sk->sk_state != BT_CONNECTED || isock->sk->sk_state != BT_CONNECTED) { 93 if (csock->sk->sk_state != BT_CONNECTED || isock->sk->sk_state != BT_CONNECTED) {
94 fput(csock->file); 94 sockfd_put(csock);
95 fput(isock->file); 95 sockfd_put(isock);
96 return -EBADFD; 96 return -EBADFD;
97 } 97 }
98 98
@@ -101,8 +101,8 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
101 if (copy_to_user(argp, &ca, sizeof(ca))) 101 if (copy_to_user(argp, &ca, sizeof(ca)))
102 err = -EFAULT; 102 err = -EFAULT;
103 } else { 103 } else {
104 fput(csock->file); 104 sockfd_put(csock);
105 fput(isock->file); 105 sockfd_put(isock);
106 } 106 }
107 107
108 return err; 108 return err;
@@ -255,7 +255,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol)
255 if (sock->type != SOCK_RAW) 255 if (sock->type != SOCK_RAW)
256 return -ESOCKTNOSUPPORT; 256 return -ESOCKTNOSUPPORT;
257 257
258 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); 258 sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto);
259 if (!sk) 259 if (!sk)
260 return -ENOMEM; 260 return -ENOMEM;
261 261
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6fbbae78b304..a8811c0a0cea 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -99,13 +99,6 @@ static void l2cap_sock_clear_timer(struct sock *sk)
99 sk_stop_timer(sk, &sk->sk_timer); 99 sk_stop_timer(sk, &sk->sk_timer);
100} 100}
101 101
102static void l2cap_sock_init_timer(struct sock *sk)
103{
104 init_timer(&sk->sk_timer);
105 sk->sk_timer.function = l2cap_sock_timeout;
106 sk->sk_timer.data = (unsigned long)sk;
107}
108
109/* ---- L2CAP channels ---- */ 102/* ---- L2CAP channels ---- */
110static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) 103static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
111{ 104{
@@ -395,9 +388,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
395 388
396 conn->feat_mask = 0; 389 conn->feat_mask = 0;
397 390
398 init_timer(&conn->info_timer); 391 setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long)conn);
399 conn->info_timer.function = l2cap_info_timeout;
400 conn->info_timer.data = (unsigned long) conn;
401 392
402 spin_lock_init(&conn->lock); 393 spin_lock_init(&conn->lock);
403 rwlock_init(&conn->chan_list.lock); 394 rwlock_init(&conn->chan_list.lock);
@@ -607,7 +598,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
607{ 598{
608 struct sock *sk; 599 struct sock *sk;
609 600
610 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); 601 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
611 if (!sk) 602 if (!sk)
612 return NULL; 603 return NULL;
613 604
@@ -622,7 +613,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
622 sk->sk_protocol = proto; 613 sk->sk_protocol = proto;
623 sk->sk_state = BT_OPEN; 614 sk->sk_state = BT_OPEN;
624 615
625 l2cap_sock_init_timer(sk); 616 setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long)sk);
626 617
627 bt_sock_link(&l2cap_sk_list, sk); 618 bt_sock_link(&l2cap_sk_list, sk);
628 return sk; 619 return sk;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e7ac6ba7ecab..d3e4e1877e6a 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -279,9 +279,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
279 if (!d) 279 if (!d)
280 return NULL; 280 return NULL;
281 281
282 init_timer(&d->timer); 282 setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d);
283 d->timer.function = rfcomm_dlc_timeout;
284 d->timer.data = (unsigned long) d;
285 283
286 skb_queue_head_init(&d->tx_queue); 284 skb_queue_head_init(&d->tx_queue);
287 spin_lock_init(&d->lock); 285 spin_lock_init(&d->lock);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 266b6972667d..c46d51035e77 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -287,7 +287,7 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int
287 struct rfcomm_dlc *d; 287 struct rfcomm_dlc *d;
288 struct sock *sk; 288 struct sock *sk;
289 289
290 sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); 290 sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto);
291 if (!sk) 291 if (!sk)
292 return NULL; 292 return NULL;
293 293
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index e447651a2dbe..788c70321858 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -95,9 +95,10 @@ static void rfcomm_dev_destruct(struct rfcomm_dev *dev)
95 95
96 BT_DBG("dev %p dlc %p", dev, dlc); 96 BT_DBG("dev %p dlc %p", dev, dlc);
97 97
98 write_lock_bh(&rfcomm_dev_lock); 98 /* Refcount should only hit zero when called from rfcomm_dev_del()
99 list_del_init(&dev->list); 99 which will have taken us off the list. Everything else are
100 write_unlock_bh(&rfcomm_dev_lock); 100 refcounting bugs. */
101 BUG_ON(!list_empty(&dev->list));
101 102
102 rfcomm_dlc_lock(dlc); 103 rfcomm_dlc_lock(dlc);
103 /* Detach DLC if it's owned by this dev */ 104 /* Detach DLC if it's owned by this dev */
@@ -109,11 +110,6 @@ static void rfcomm_dev_destruct(struct rfcomm_dev *dev)
109 110
110 tty_unregister_device(rfcomm_tty_driver, dev->id); 111 tty_unregister_device(rfcomm_tty_driver, dev->id);
111 112
112 /* Refcount should only hit zero when called from rfcomm_dev_del()
113 which will have taken us off the list. Everything else are
114 refcounting bugs. */
115 BUG_ON(!list_empty(&dev->list));
116
117 kfree(dev); 113 kfree(dev);
118 114
119 /* It's safe to call module_put() here because socket still 115 /* It's safe to call module_put() here because socket still
@@ -313,7 +309,15 @@ static void rfcomm_dev_del(struct rfcomm_dev *dev)
313{ 309{
314 BT_DBG("dev %p", dev); 310 BT_DBG("dev %p", dev);
315 311
316 set_bit(RFCOMM_TTY_RELEASED, &dev->flags); 312 if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags))
313 BUG_ON(1);
314 else
315 set_bit(RFCOMM_TTY_RELEASED, &dev->flags);
316
317 write_lock_bh(&rfcomm_dev_lock);
318 list_del_init(&dev->list);
319 write_unlock_bh(&rfcomm_dev_lock);
320
317 rfcomm_dev_put(dev); 321 rfcomm_dev_put(dev);
318} 322}
319 323
@@ -692,7 +696,8 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
692 BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, dev->opened); 696 BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, dev->opened);
693 697
694 if (--dev->opened == 0) { 698 if (--dev->opened == 0) {
695 device_move(dev->tty_dev, NULL); 699 if (dev->tty_dev->parent)
700 device_move(dev->tty_dev, NULL);
696 701
697 /* Close DLC and dettach TTY */ 702 /* Close DLC and dettach TTY */
698 rfcomm_dlc_close(dev->dlc, 0); 703 rfcomm_dlc_close(dev->dlc, 0);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 82d0dfdfa7e2..b91d3c81a73c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -97,13 +97,6 @@ static void sco_sock_clear_timer(struct sock *sk)
97 sk_stop_timer(sk, &sk->sk_timer); 97 sk_stop_timer(sk, &sk->sk_timer);
98} 98}
99 99
100static void sco_sock_init_timer(struct sock *sk)
101{
102 init_timer(&sk->sk_timer);
103 sk->sk_timer.function = sco_sock_timeout;
104 sk->sk_timer.data = (unsigned long)sk;
105}
106
107/* ---- SCO connections ---- */ 100/* ---- SCO connections ---- */
108static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status) 101static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status)
109{ 102{
@@ -421,7 +414,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
421{ 414{
422 struct sock *sk; 415 struct sock *sk;
423 416
424 sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); 417 sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto);
425 if (!sk) 418 if (!sk)
426 return NULL; 419 return NULL;
427 420
@@ -436,7 +429,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
436 sk->sk_protocol = proto; 429 sk->sk_protocol = proto;
437 sk->sk_state = BT_OPEN; 430 sk->sk_state = BT_OPEN;
438 431
439 sco_sock_init_timer(sk); 432 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk);
440 433
441 bt_sock_link(&sco_sk_list, sk); 434 bt_sock_link(&sco_sk_list, sk);
442 return sk; 435 return sk;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 93867bb6cc97..a90182873120 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -39,7 +39,7 @@ static int __init br_init(void)
39 39
40 err = br_fdb_init(); 40 err = br_fdb_init();
41 if (err) 41 if (err)
42 goto err_out1; 42 goto err_out;
43 43
44 err = br_netfilter_init(); 44 err = br_netfilter_init();
45 if (err) 45 if (err)
@@ -65,6 +65,8 @@ err_out3:
65err_out2: 65err_out2:
66 br_netfilter_fini(); 66 br_netfilter_fini();
67err_out1: 67err_out1:
68 br_fdb_fini();
69err_out:
68 llc_sap_put(br_stp_sap); 70 llc_sap_put(br_stp_sap);
69 return err; 71 return err;
70} 72}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index c07bac5e3e10..bf7787395fe0 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -157,8 +157,7 @@ static struct ethtool_ops br_ethtool_ops = {
157 157
158void br_dev_setup(struct net_device *dev) 158void br_dev_setup(struct net_device *dev)
159{ 159{
160 memset(dev->dev_addr, 0, ETH_ALEN); 160 random_ether_addr(dev->dev_addr);
161
162 ether_setup(dev); 161 ether_setup(dev);
163 162
164 dev->do_ioctl = br_dev_ioctl; 163 dev->do_ioctl = br_dev_ioctl;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index eb57502bb264..bc40377136a2 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -44,7 +44,7 @@ int __init br_fdb_init(void)
44 return 0; 44 return 0;
45} 45}
46 46
47void __exit br_fdb_fini(void) 47void br_fdb_fini(void)
48{ 48{
49 kmem_cache_destroy(br_fdb_cache); 49 kmem_cache_destroy(br_fdb_cache);
50} 50}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 935784f736b3..298e0f463c56 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -133,7 +133,7 @@ static void del_nbp(struct net_bridge_port *p)
133 struct net_bridge *br = p->br; 133 struct net_bridge *br = p->br;
134 struct net_device *dev = p->dev; 134 struct net_device *dev = p->dev;
135 135
136 sysfs_remove_link(&br->ifobj, dev->name); 136 sysfs_remove_link(br->ifobj, dev->name);
137 137
138 dev_set_promiscuity(dev, -1); 138 dev_set_promiscuity(dev, -1);
139 139
@@ -258,12 +258,6 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
258 p->state = BR_STATE_DISABLED; 258 p->state = BR_STATE_DISABLED;
259 br_stp_port_timer_init(p); 259 br_stp_port_timer_init(p);
260 260
261 kobject_init(&p->kobj);
262 kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR);
263 p->kobj.ktype = &brport_ktype;
264 p->kobj.parent = &(dev->dev.kobj);
265 p->kobj.kset = NULL;
266
267 return p; 261 return p;
268} 262}
269 263
@@ -379,7 +373,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
379 if (IS_ERR(p)) 373 if (IS_ERR(p))
380 return PTR_ERR(p); 374 return PTR_ERR(p);
381 375
382 err = kobject_add(&p->kobj); 376 err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
377 SYSFS_BRIDGE_PORT_ATTR);
383 if (err) 378 if (err)
384 goto err0; 379 goto err0;
385 380
@@ -416,6 +411,7 @@ err2:
416 br_fdb_delete_by_port(br, p, 1); 411 br_fdb_delete_by_port(br, p, 1);
417err1: 412err1:
418 kobject_del(&p->kobj); 413 kobject_del(&p->kobj);
414 return err;
419err0: 415err0:
420 kobject_put(&p->kobj); 416 kobject_put(&p->kobj);
421 return err; 417 return err;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3cedd4eeeed6..255c00f60ce7 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -109,7 +109,7 @@ static inline int is_link_local(const unsigned char *dest)
109{ 109{
110 __be16 *a = (__be16 *)dest; 110 __be16 *a = (__be16 *)dest;
111 static const __be16 *b = (const __be16 *)br_group_address; 111 static const __be16 *b = (const __be16 *)br_group_address;
112 static const __be16 m = __constant_cpu_to_be16(0xfff0); 112 static const __be16 m = cpu_to_be16(0xfff0);
113 113
114 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; 114 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
115} 115}
@@ -122,6 +122,7 @@ static inline int is_link_local(const unsigned char *dest)
122struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) 122struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
123{ 123{
124 const unsigned char *dest = eth_hdr(skb)->h_dest; 124 const unsigned char *dest = eth_hdr(skb)->h_dest;
125 int (*rhook)(struct sk_buff *skb);
125 126
126 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 127 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
127 goto drop; 128 goto drop;
@@ -147,9 +148,9 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
147 148
148 switch (p->state) { 149 switch (p->state) {
149 case BR_STATE_FORWARDING: 150 case BR_STATE_FORWARDING:
150 151 rhook = rcu_dereference(br_should_route_hook);
151 if (br_should_route_hook) { 152 if (rhook != NULL) {
152 if (br_should_route_hook(skb)) 153 if (rhook(skb))
153 return skb; 154 return skb;
154 dest = eth_hdr(skb)->h_dest; 155 dest = eth_hdr(skb)->h_dest;
155 } 156 }
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index da22f900e89d..1c0efd8ad9f3 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -142,6 +142,23 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
142 return skb->nf_bridge; 142 return skb->nf_bridge;
143} 143}
144 144
145static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
146{
147 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
148
149 if (atomic_read(&nf_bridge->use) > 1) {
150 struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
151
152 if (tmp) {
153 memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
154 atomic_set(&tmp->use, 1);
155 nf_bridge_put(nf_bridge);
156 }
157 nf_bridge = tmp;
158 }
159 return nf_bridge;
160}
161
145static inline void nf_bridge_push_encap_header(struct sk_buff *skb) 162static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
146{ 163{
147 unsigned int len = nf_bridge_encap_header_len(skb); 164 unsigned int len = nf_bridge_encap_header_len(skb);
@@ -247,8 +264,9 @@ static void __br_dnat_complain(void)
247 * Let us first consider the case that ip_route_input() succeeds: 264 * Let us first consider the case that ip_route_input() succeeds:
248 * 265 *
249 * If skb->dst->dev equals the logical bridge device the packet 266 * If skb->dst->dev equals the logical bridge device the packet
250 * came in on, we can consider this bridging. We then call 267 * came in on, we can consider this bridging. The packet is passed
251 * skb->dst->output() which will make the packet enter br_nf_local_out() 268 * through the neighbour output function to build a new destination
269 * MAC address, which will make the packet enter br_nf_local_out()
252 * not much later. In that function it is assured that the iptables 270 * not much later. In that function it is assured that the iptables
253 * FORWARD chain is traversed for the packet. 271 * FORWARD chain is traversed for the packet.
254 * 272 *
@@ -285,12 +303,17 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
285 skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 303 skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
286 304
287 skb->dev = bridge_parent(skb->dev); 305 skb->dev = bridge_parent(skb->dev);
288 if (!skb->dev) 306 if (skb->dev) {
289 kfree_skb(skb); 307 struct dst_entry *dst = skb->dst;
290 else { 308
291 nf_bridge_pull_encap_header(skb); 309 nf_bridge_pull_encap_header(skb);
292 skb->dst->output(skb); 310
311 if (dst->hh)
312 return neigh_hh_output(dst->hh, skb);
313 else if (dst->neighbour)
314 return dst->neighbour->output(skb);
293 } 315 }
316 kfree_skb(skb);
294 return 0; 317 return 0;
295} 318}
296 319
@@ -330,7 +353,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
330 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 353 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
331 goto free_skb; 354 goto free_skb;
332 355
333 if (!ip_route_output_key(&rt, &fl)) { 356 if (!ip_route_output_key(&init_net, &rt, &fl)) {
334 /* - Bridged-and-DNAT'ed traffic doesn't 357 /* - Bridged-and-DNAT'ed traffic doesn't
335 * require ip_forwarding. */ 358 * require ip_forwarding. */
336 if (((struct dst_entry *)rt)->dev == dev) { 359 if (((struct dst_entry *)rt)->dev == dev) {
@@ -488,7 +511,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
488 if (!setup_pre_routing(skb)) 511 if (!setup_pre_routing(skb))
489 return NF_DROP; 512 return NF_DROP;
490 513
491 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, 514 NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
492 br_nf_pre_routing_finish_ipv6); 515 br_nf_pre_routing_finish_ipv6);
493 516
494 return NF_STOLEN; 517 return NF_STOLEN;
@@ -561,7 +584,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
561 return NF_DROP; 584 return NF_DROP;
562 store_orig_dstaddr(skb); 585 store_orig_dstaddr(skb);
563 586
564 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, 587 NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
565 br_nf_pre_routing_finish); 588 br_nf_pre_routing_finish);
566 589
567 return NF_STOLEN; 590 return NF_STOLEN;
@@ -631,6 +654,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
631 if (!skb->nf_bridge) 654 if (!skb->nf_bridge)
632 return NF_ACCEPT; 655 return NF_ACCEPT;
633 656
657 /* Need exclusive nf_bridge_info since we might have multiple
658 * different physoutdevs. */
659 if (!nf_bridge_unshare(skb))
660 return NF_DROP;
661
634 parent = bridge_parent(out); 662 parent = bridge_parent(out);
635 if (!parent) 663 if (!parent)
636 return NF_DROP; 664 return NF_DROP;
@@ -653,7 +681,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
653 nf_bridge->mask |= BRNF_BRIDGED; 681 nf_bridge->mask |= BRNF_BRIDGED;
654 nf_bridge->physoutdev = skb->dev; 682 nf_bridge->physoutdev = skb->dev;
655 683
656 NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent, 684 NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
657 br_nf_forward_finish); 685 br_nf_forward_finish);
658 686
659 return NF_STOLEN; 687 return NF_STOLEN;
@@ -712,6 +740,11 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
712 if (!skb->nf_bridge) 740 if (!skb->nf_bridge)
713 return NF_ACCEPT; 741 return NF_ACCEPT;
714 742
743 /* Need exclusive nf_bridge_info since we might have multiple
744 * different physoutdevs. */
745 if (!nf_bridge_unshare(skb))
746 return NF_DROP;
747
715 nf_bridge = skb->nf_bridge; 748 nf_bridge = skb->nf_bridge;
716 if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT)) 749 if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
717 return NF_ACCEPT; 750 return NF_ACCEPT;
@@ -766,6 +799,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
766 if (!nf_bridge) 799 if (!nf_bridge)
767 return NF_ACCEPT; 800 return NF_ACCEPT;
768 801
802 if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)))
803 return NF_ACCEPT;
804
769 if (!realoutdev) 805 if (!realoutdev)
770 return NF_DROP; 806 return NF_DROP;
771 807
@@ -792,11 +828,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
792 nf_bridge_pull_encap_header(skb); 828 nf_bridge_pull_encap_header(skb);
793 nf_bridge_save_header(skb); 829 nf_bridge_save_header(skb);
794 830
795#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) 831 NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
796 if (nf_bridge->netoutdev)
797 realoutdev = nf_bridge->netoutdev;
798#endif
799 NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
800 br_nf_dev_queue_xmit); 832 br_nf_dev_queue_xmit);
801 833
802 return NF_STOLEN; 834 return NF_STOLEN;
@@ -835,7 +867,7 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
835 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. 867 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
836 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 868 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
837 * ip_refrag() can return NF_STOLEN. */ 869 * ip_refrag() can return NF_STOLEN. */
838static struct nf_hook_ops br_nf_ops[] = { 870static struct nf_hook_ops br_nf_ops[] __read_mostly = {
839 { .hook = br_nf_pre_routing, 871 { .hook = br_nf_pre_routing,
840 .owner = THIS_MODULE, 872 .owner = THIS_MODULE,
841 .pf = PF_BRIDGE, 873 .pf = PF_BRIDGE,
@@ -869,12 +901,12 @@ static struct nf_hook_ops br_nf_ops[] = {
869 { .hook = ip_sabotage_in, 901 { .hook = ip_sabotage_in,
870 .owner = THIS_MODULE, 902 .owner = THIS_MODULE,
871 .pf = PF_INET, 903 .pf = PF_INET,
872 .hooknum = NF_IP_PRE_ROUTING, 904 .hooknum = NF_INET_PRE_ROUTING,
873 .priority = NF_IP_PRI_FIRST, }, 905 .priority = NF_IP_PRI_FIRST, },
874 { .hook = ip_sabotage_in, 906 { .hook = ip_sabotage_in,
875 .owner = THIS_MODULE, 907 .owner = THIS_MODULE,
876 .pf = PF_INET6, 908 .pf = PF_INET6,
877 .hooknum = NF_IP6_PRE_ROUTING, 909 .hooknum = NF_INET_PRE_ROUTING,
878 .priority = NF_IP6_PRI_FIRST, }, 910 .priority = NF_IP6_PRI_FIRST, },
879}; 911};
880 912
@@ -931,24 +963,10 @@ static ctl_table brnf_table[] = {
931 { .ctl_name = 0 } 963 { .ctl_name = 0 }
932}; 964};
933 965
934static ctl_table brnf_bridge_table[] = { 966static struct ctl_path brnf_path[] = {
935 { 967 { .procname = "net", .ctl_name = CTL_NET, },
936 .ctl_name = NET_BRIDGE, 968 { .procname = "bridge", .ctl_name = NET_BRIDGE, },
937 .procname = "bridge", 969 { }
938 .mode = 0555,
939 .child = brnf_table,
940 },
941 { .ctl_name = 0 }
942};
943
944static ctl_table brnf_net_table[] = {
945 {
946 .ctl_name = CTL_NET,
947 .procname = "net",
948 .mode = 0555,
949 .child = brnf_bridge_table,
950 },
951 { .ctl_name = 0 }
952}; 970};
953#endif 971#endif
954 972
@@ -960,7 +978,7 @@ int __init br_netfilter_init(void)
960 if (ret < 0) 978 if (ret < 0)
961 return ret; 979 return ret;
962#ifdef CONFIG_SYSCTL 980#ifdef CONFIG_SYSCTL
963 brnf_sysctl_header = register_sysctl_table(brnf_net_table); 981 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
964 if (brnf_sysctl_header == NULL) { 982 if (brnf_sysctl_header == NULL) {
965 printk(KERN_WARNING 983 printk(KERN_WARNING
966 "br_netfilter: can't register to sysctl.\n"); 984 "br_netfilter: can't register to sysctl.\n");
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 53ab8e0cb518..f5d69336d97b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <net/rtnetlink.h> 14#include <net/rtnetlink.h>
15#include <net/net_namespace.h> 15#include <net/net_namespace.h>
16#include <net/sock.h>
16#include "br_private.h" 17#include "br_private.h"
17 18
18static inline size_t br_nlmsg_size(void) 19static inline size_t br_nlmsg_size(void)
@@ -96,10 +97,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
96 kfree_skb(skb); 97 kfree_skb(skb);
97 goto errout; 98 goto errout;
98 } 99 }
99 err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); 100 err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
100errout: 101errout:
101 if (err < 0) 102 if (err < 0)
102 rtnl_set_sk_err(RTNLGRP_LINK, err); 103 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
103} 104}
104 105
105/* 106/*
@@ -107,9 +108,13 @@ errout:
107 */ 108 */
108static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 109static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
109{ 110{
111 struct net *net = skb->sk->sk_net;
110 struct net_device *dev; 112 struct net_device *dev;
111 int idx; 113 int idx;
112 114
115 if (net != &init_net)
116 return 0;
117
113 idx = 0; 118 idx = 0;
114 for_each_netdev(&init_net, dev) { 119 for_each_netdev(&init_net, dev) {
115 /* not a bridge port */ 120 /* not a bridge port */
@@ -135,12 +140,16 @@ skip:
135 */ 140 */
136static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 141static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
137{ 142{
143 struct net *net = skb->sk->sk_net;
138 struct ifinfomsg *ifm; 144 struct ifinfomsg *ifm;
139 struct nlattr *protinfo; 145 struct nlattr *protinfo;
140 struct net_device *dev; 146 struct net_device *dev;
141 struct net_bridge_port *p; 147 struct net_bridge_port *p;
142 u8 new_state; 148 u8 new_state;
143 149
150 if (net != &init_net)
151 return -EINVAL;
152
144 if (nlmsg_len(nlh) < sizeof(*ifm)) 153 if (nlmsg_len(nlh) < sizeof(*ifm))
145 return -EINVAL; 154 return -EINVAL;
146 155
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f666f7b28ff5..c11b554fd109 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -124,7 +124,7 @@ struct net_bridge
124 struct timer_list tcn_timer; 124 struct timer_list tcn_timer;
125 struct timer_list topology_change_timer; 125 struct timer_list topology_change_timer;
126 struct timer_list gc_timer; 126 struct timer_list gc_timer;
127 struct kobject ifobj; 127 struct kobject *ifobj;
128}; 128};
129 129
130extern struct notifier_block br_device_notifier; 130extern struct notifier_block br_device_notifier;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 3312e8f2abe4..9cf0538d1717 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -426,16 +426,10 @@ int br_sysfs_addbr(struct net_device *dev)
426 goto out2; 426 goto out2;
427 } 427 }
428 428
429 429 br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, brobj);
430 kobject_set_name(&br->ifobj, SYSFS_BRIDGE_PORT_SUBDIR); 430 if (!br->ifobj) {
431 br->ifobj.ktype = NULL;
432 br->ifobj.kset = NULL;
433 br->ifobj.parent = brobj;
434
435 err = kobject_register(&br->ifobj);
436 if (err) {
437 pr_info("%s: can't add kobject (directory) %s/%s\n", 431 pr_info("%s: can't add kobject (directory) %s/%s\n",
438 __FUNCTION__, dev->name, kobject_name(&br->ifobj)); 432 __FUNCTION__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR);
439 goto out3; 433 goto out3;
440 } 434 }
441 return 0; 435 return 0;
@@ -453,7 +447,7 @@ void br_sysfs_delbr(struct net_device *dev)
453 struct kobject *kobj = &dev->dev.kobj; 447 struct kobject *kobj = &dev->dev.kobj;
454 struct net_bridge *br = netdev_priv(dev); 448 struct net_bridge *br = netdev_priv(dev);
455 449
456 kobject_unregister(&br->ifobj); 450 kobject_put(br->ifobj);
457 sysfs_remove_bin_file(kobj, &bridge_forward); 451 sysfs_remove_bin_file(kobj, &bridge_forward);
458 sysfs_remove_group(kobj, &bridge_group); 452 sysfs_remove_group(kobj, &bridge_group);
459} 453}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 79db51fcb476..02b2d50cce4d 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -229,7 +229,7 @@ int br_sysfs_addif(struct net_bridge_port *p)
229 goto out2; 229 goto out2;
230 } 230 }
231 231
232 err= sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name); 232 err = sysfs_create_link(br->ifobj, &p->kobj, p->dev->name);
233out2: 233out2:
234 return err; 234 return err;
235} 235}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index b84fc6075fe1..4a3e2bf892c7 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5menu "Bridge: Netfilter Configuration" 5menu "Bridge: Netfilter Configuration"
6 depends on BRIDGE && NETFILTER 6 depends on BRIDGE && BRIDGE_NETFILTER
7 7
8config BRIDGE_NF_EBTABLES 8config BRIDGE_NF_EBTABLES
9 tristate "Ethernet Bridge tables (ebtables) support" 9 tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 41a78072cd0e..98534025360f 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -15,8 +15,8 @@
15static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in, 15static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in,
16 const struct net_device *out, const void *data, unsigned int datalen) 16 const struct net_device *out, const void *data, unsigned int datalen)
17{ 17{
18 struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; 18 const struct ebt_802_3_info *info = data;
19 struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); 19 const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
20 __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; 20 __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
21 21
22 if (info->bitmask & EBT_802_3_SAP) { 22 if (info->bitmask & EBT_802_3_SAP) {
@@ -40,7 +40,7 @@ static struct ebt_match filter_802_3;
40static int ebt_802_3_check(const char *tablename, unsigned int hookmask, 40static int ebt_802_3_check(const char *tablename, unsigned int hookmask,
41 const struct ebt_entry *e, void *data, unsigned int datalen) 41 const struct ebt_entry *e, void *data, unsigned int datalen)
42{ 42{
43 struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; 43 const struct ebt_802_3_info *info = data;
44 44
45 if (datalen < sizeof(struct ebt_802_3_info)) 45 if (datalen < sizeof(struct ebt_802_3_info))
46 return -EINVAL; 46 return -EINVAL;
@@ -50,8 +50,7 @@ static int ebt_802_3_check(const char *tablename, unsigned int hookmask,
50 return 0; 50 return 0;
51} 51}
52 52
53static struct ebt_match filter_802_3 = 53static struct ebt_match filter_802_3 __read_mostly = {
54{
55 .name = EBT_802_3_MATCH, 54 .name = EBT_802_3_MATCH,
56 .match = ebt_filter_802_3, 55 .match = ebt_filter_802_3,
57 .check = ebt_802_3_check, 56 .check = ebt_802_3_check,
@@ -70,4 +69,5 @@ static void __exit ebt_802_3_fini(void)
70 69
71module_init(ebt_802_3_init); 70module_init(ebt_802_3_init);
72module_exit(ebt_802_3_fini); 71module_exit(ebt_802_3_fini);
72MODULE_DESCRIPTION("Ebtables: DSAP/SSAP field and SNAP type matching");
73MODULE_LICENSE("GPL"); 73MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 392d877040d3..70b6dca5ea75 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -25,7 +25,7 @@ static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
25 const struct ebt_mac_wormhash_tuple *p; 25 const struct ebt_mac_wormhash_tuple *p;
26 int start, limit, i; 26 int start, limit, i;
27 uint32_t cmp[2] = { 0, 0 }; 27 uint32_t cmp[2] = { 0, 0 };
28 int key = (const unsigned char) mac[5]; 28 int key = ((const unsigned char *)mac)[5];
29 29
30 memcpy(((char *) cmp) + 2, mac, 6); 30 memcpy(((char *) cmp) + 2, mac, 6);
31 start = wh->table[key]; 31 start = wh->table[key];
@@ -73,15 +73,18 @@ static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash
73static int get_ip_dst(const struct sk_buff *skb, __be32 *addr) 73static int get_ip_dst(const struct sk_buff *skb, __be32 *addr)
74{ 74{
75 if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { 75 if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) {
76 struct iphdr _iph, *ih; 76 const struct iphdr *ih;
77 struct iphdr _iph;
77 78
78 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); 79 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
79 if (ih == NULL) 80 if (ih == NULL)
80 return -1; 81 return -1;
81 *addr = ih->daddr; 82 *addr = ih->daddr;
82 } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { 83 } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
83 struct arphdr _arph, *ah; 84 const struct arphdr *ah;
84 __be32 buf, *bp; 85 struct arphdr _arph;
86 const __be32 *bp;
87 __be32 buf;
85 88
86 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); 89 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
87 if (ah == NULL || 90 if (ah == NULL ||
@@ -101,15 +104,18 @@ static int get_ip_dst(const struct sk_buff *skb, __be32 *addr)
101static int get_ip_src(const struct sk_buff *skb, __be32 *addr) 104static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
102{ 105{
103 if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { 106 if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) {
104 struct iphdr _iph, *ih; 107 const struct iphdr *ih;
108 struct iphdr _iph;
105 109
106 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); 110 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
107 if (ih == NULL) 111 if (ih == NULL)
108 return -1; 112 return -1;
109 *addr = ih->saddr; 113 *addr = ih->saddr;
110 } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { 114 } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
111 struct arphdr _arph, *ah; 115 const struct arphdr *ah;
112 __be32 buf, *bp; 116 struct arphdr _arph;
117 const __be32 *bp;
118 __be32 buf;
113 119
114 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); 120 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
115 if (ah == NULL || 121 if (ah == NULL ||
@@ -130,7 +136,7 @@ static int ebt_filter_among(const struct sk_buff *skb,
130 const struct net_device *out, const void *data, 136 const struct net_device *out, const void *data,
131 unsigned int datalen) 137 unsigned int datalen)
132{ 138{
133 struct ebt_among_info *info = (struct ebt_among_info *) data; 139 const struct ebt_among_info *info = data;
134 const char *dmac, *smac; 140 const char *dmac, *smac;
135 const struct ebt_mac_wormhash *wh_dst, *wh_src; 141 const struct ebt_mac_wormhash *wh_dst, *wh_src;
136 __be32 dip = 0, sip = 0; 142 __be32 dip = 0, sip = 0;
@@ -175,7 +181,7 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask,
175 const struct ebt_entry *e, void *data, 181 const struct ebt_entry *e, void *data,
176 unsigned int datalen) 182 unsigned int datalen)
177{ 183{
178 struct ebt_among_info *info = (struct ebt_among_info *) data; 184 const struct ebt_among_info *info = data;
179 int expected_length = sizeof(struct ebt_among_info); 185 int expected_length = sizeof(struct ebt_among_info);
180 const struct ebt_mac_wormhash *wh_dst, *wh_src; 186 const struct ebt_mac_wormhash *wh_dst, *wh_src;
181 int err; 187 int err;
@@ -187,7 +193,7 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask,
187 193
188 if (datalen != EBT_ALIGN(expected_length)) { 194 if (datalen != EBT_ALIGN(expected_length)) {
189 printk(KERN_WARNING 195 printk(KERN_WARNING
190 "ebtables: among: wrong size: %d" 196 "ebtables: among: wrong size: %d "
191 "against expected %d, rounded to %Zd\n", 197 "against expected %d, rounded to %Zd\n",
192 datalen, expected_length, 198 datalen, expected_length,
193 EBT_ALIGN(expected_length)); 199 EBT_ALIGN(expected_length));
@@ -206,7 +212,7 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask,
206 return 0; 212 return 0;
207} 213}
208 214
209static struct ebt_match filter_among = { 215static struct ebt_match filter_among __read_mostly = {
210 .name = EBT_AMONG_MATCH, 216 .name = EBT_AMONG_MATCH,
211 .match = ebt_filter_among, 217 .match = ebt_filter_among,
212 .check = ebt_among_check, 218 .check = ebt_among_check,
@@ -225,4 +231,5 @@ static void __exit ebt_among_fini(void)
225 231
226module_init(ebt_among_init); 232module_init(ebt_among_init);
227module_exit(ebt_among_fini); 233module_exit(ebt_among_fini);
234MODULE_DESCRIPTION("Ebtables: Combined MAC/IP address list matching");
228MODULE_LICENSE("GPL"); 235MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 1a46952a56d9..7c535be75665 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -18,8 +18,9 @@
18static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in, 18static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in,
19 const struct net_device *out, const void *data, unsigned int datalen) 19 const struct net_device *out, const void *data, unsigned int datalen)
20{ 20{
21 struct ebt_arp_info *info = (struct ebt_arp_info *)data; 21 const struct ebt_arp_info *info = data;
22 struct arphdr _arph, *ah; 22 const struct arphdr *ah;
23 struct arphdr _arph;
23 24
24 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); 25 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
25 if (ah == NULL) 26 if (ah == NULL)
@@ -34,8 +35,9 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
34 ah->ar_pro, EBT_ARP_PTYPE)) 35 ah->ar_pro, EBT_ARP_PTYPE))
35 return EBT_NOMATCH; 36 return EBT_NOMATCH;
36 37
37 if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { 38 if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) {
38 __be32 saddr, daddr, *sap, *dap; 39 const __be32 *sap, *dap;
40 __be32 saddr, daddr;
39 41
40 if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) 42 if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP))
41 return EBT_NOMATCH; 43 return EBT_NOMATCH;
@@ -61,7 +63,8 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
61 } 63 }
62 64
63 if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { 65 if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
64 unsigned char _mac[ETH_ALEN], *mp; 66 const unsigned char *mp;
67 unsigned char _mac[ETH_ALEN];
65 uint8_t verdict, i; 68 uint8_t verdict, i;
66 69
67 if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) 70 if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
@@ -100,7 +103,7 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
100static int ebt_arp_check(const char *tablename, unsigned int hookmask, 103static int ebt_arp_check(const char *tablename, unsigned int hookmask,
101 const struct ebt_entry *e, void *data, unsigned int datalen) 104 const struct ebt_entry *e, void *data, unsigned int datalen)
102{ 105{
103 struct ebt_arp_info *info = (struct ebt_arp_info *)data; 106 const struct ebt_arp_info *info = data;
104 107
105 if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info))) 108 if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info)))
106 return -EINVAL; 109 return -EINVAL;
@@ -113,8 +116,7 @@ static int ebt_arp_check(const char *tablename, unsigned int hookmask,
113 return 0; 116 return 0;
114} 117}
115 118
116static struct ebt_match filter_arp = 119static struct ebt_match filter_arp __read_mostly = {
117{
118 .name = EBT_ARP_MATCH, 120 .name = EBT_ARP_MATCH,
119 .match = ebt_filter_arp, 121 .match = ebt_filter_arp,
120 .check = ebt_arp_check, 122 .check = ebt_arp_check,
@@ -133,4 +135,5 @@ static void __exit ebt_arp_fini(void)
133 135
134module_init(ebt_arp_init); 136module_init(ebt_arp_init);
135module_exit(ebt_arp_fini); 137module_exit(ebt_arp_fini);
138MODULE_DESCRIPTION("Ebtables: ARP protocol packet match");
136MODULE_LICENSE("GPL"); 139MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 48a80e423287..0c4279590fc7 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -19,10 +19,13 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
19 const struct net_device *in, const struct net_device *out, 19 const struct net_device *in, const struct net_device *out,
20 const void *data, unsigned int datalen) 20 const void *data, unsigned int datalen)
21{ 21{
22 struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; 22 struct ebt_arpreply_info *info = (void *)data;
23 __be32 _sip, *siptr, _dip, *diptr; 23 const __be32 *siptr, *diptr;
24 struct arphdr _ah, *ap; 24 __be32 _sip, _dip;
25 unsigned char _sha[ETH_ALEN], *shp; 25 const struct arphdr *ap;
26 struct arphdr _ah;
27 const unsigned char *shp;
28 unsigned char _sha[ETH_ALEN];
26 29
27 ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); 30 ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
28 if (ap == NULL) 31 if (ap == NULL)
@@ -58,7 +61,7 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
58static int ebt_target_reply_check(const char *tablename, unsigned int hookmask, 61static int ebt_target_reply_check(const char *tablename, unsigned int hookmask,
59 const struct ebt_entry *e, void *data, unsigned int datalen) 62 const struct ebt_entry *e, void *data, unsigned int datalen)
60{ 63{
61 struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; 64 const struct ebt_arpreply_info *info = data;
62 65
63 if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info))) 66 if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info)))
64 return -EINVAL; 67 return -EINVAL;
@@ -73,8 +76,7 @@ static int ebt_target_reply_check(const char *tablename, unsigned int hookmask,
73 return 0; 76 return 0;
74} 77}
75 78
76static struct ebt_target reply_target = 79static struct ebt_target reply_target __read_mostly = {
77{
78 .name = EBT_ARPREPLY_TARGET, 80 .name = EBT_ARPREPLY_TARGET,
79 .target = ebt_target_reply, 81 .target = ebt_target_reply,
80 .check = ebt_target_reply_check, 82 .check = ebt_target_reply_check,
@@ -93,4 +95,5 @@ static void __exit ebt_arpreply_fini(void)
93 95
94module_init(ebt_arpreply_init); 96module_init(ebt_arpreply_init);
95module_exit(ebt_arpreply_fini); 97module_exit(ebt_arpreply_fini);
98MODULE_DESCRIPTION("Ebtables: ARP reply target");
96MODULE_LICENSE("GPL"); 99MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 74262e9a566a..e700cbf634c2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -18,7 +18,7 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
18 const struct net_device *in, const struct net_device *out, 18 const struct net_device *in, const struct net_device *out,
19 const void *data, unsigned int datalen) 19 const void *data, unsigned int datalen)
20{ 20{
21 struct ebt_nat_info *info = (struct ebt_nat_info *)data; 21 const struct ebt_nat_info *info = data;
22 22
23 if (skb_make_writable(skb, 0)) 23 if (skb_make_writable(skb, 0))
24 return NF_DROP; 24 return NF_DROP;
@@ -30,7 +30,7 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
30static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask, 30static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
31 const struct ebt_entry *e, void *data, unsigned int datalen) 31 const struct ebt_entry *e, void *data, unsigned int datalen)
32{ 32{
33 struct ebt_nat_info *info = (struct ebt_nat_info *)data; 33 const struct ebt_nat_info *info = data;
34 34
35 if (BASE_CHAIN && info->target == EBT_RETURN) 35 if (BASE_CHAIN && info->target == EBT_RETURN)
36 return -EINVAL; 36 return -EINVAL;
@@ -46,8 +46,7 @@ static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
46 return 0; 46 return 0;
47} 47}
48 48
49static struct ebt_target dnat = 49static struct ebt_target dnat __read_mostly = {
50{
51 .name = EBT_DNAT_TARGET, 50 .name = EBT_DNAT_TARGET,
52 .target = ebt_target_dnat, 51 .target = ebt_target_dnat,
53 .check = ebt_target_dnat_check, 52 .check = ebt_target_dnat_check,
@@ -66,4 +65,5 @@ static void __exit ebt_dnat_fini(void)
66 65
67module_init(ebt_dnat_init); 66module_init(ebt_dnat_init);
68module_exit(ebt_dnat_fini); 67module_exit(ebt_dnat_fini);
68MODULE_DESCRIPTION("Ebtables: Destination MAC address translation");
69MODULE_LICENSE("GPL"); 69MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 69f7f0ab9c76..65caa00dcf2a 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -28,9 +28,11 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
28 const struct net_device *out, const void *data, 28 const struct net_device *out, const void *data,
29 unsigned int datalen) 29 unsigned int datalen)
30{ 30{
31 struct ebt_ip_info *info = (struct ebt_ip_info *)data; 31 const struct ebt_ip_info *info = data;
32 struct iphdr _iph, *ih; 32 const struct iphdr *ih;
33 struct tcpudphdr _ports, *pptr; 33 struct iphdr _iph;
34 const struct tcpudphdr *pptr;
35 struct tcpudphdr _ports;
34 36
35 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); 37 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
36 if (ih == NULL) 38 if (ih == NULL)
@@ -79,7 +81,7 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
79static int ebt_ip_check(const char *tablename, unsigned int hookmask, 81static int ebt_ip_check(const char *tablename, unsigned int hookmask,
80 const struct ebt_entry *e, void *data, unsigned int datalen) 82 const struct ebt_entry *e, void *data, unsigned int datalen)
81{ 83{
82 struct ebt_ip_info *info = (struct ebt_ip_info *)data; 84 const struct ebt_ip_info *info = data;
83 85
84 if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info))) 86 if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info)))
85 return -EINVAL; 87 return -EINVAL;
@@ -105,8 +107,7 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask,
105 return 0; 107 return 0;
106} 108}
107 109
108static struct ebt_match filter_ip = 110static struct ebt_match filter_ip __read_mostly = {
109{
110 .name = EBT_IP_MATCH, 111 .name = EBT_IP_MATCH,
111 .match = ebt_filter_ip, 112 .match = ebt_filter_ip,
112 .check = ebt_ip_check, 113 .check = ebt_ip_check,
@@ -125,4 +126,5 @@ static void __exit ebt_ip_fini(void)
125 126
126module_init(ebt_ip_init); 127module_init(ebt_ip_init);
127module_exit(ebt_ip_fini); 128module_exit(ebt_ip_fini);
129MODULE_DESCRIPTION("Ebtables: IPv4 protocol packet match");
128MODULE_LICENSE("GPL"); 130MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index d48fa5cb26cf..8cbdc01c253e 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -69,7 +69,7 @@ user2credits(u_int32_t user)
69static int ebt_limit_check(const char *tablename, unsigned int hookmask, 69static int ebt_limit_check(const char *tablename, unsigned int hookmask,
70 const struct ebt_entry *e, void *data, unsigned int datalen) 70 const struct ebt_entry *e, void *data, unsigned int datalen)
71{ 71{
72 struct ebt_limit_info *info = (struct ebt_limit_info *)data; 72 struct ebt_limit_info *info = data;
73 73
74 if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info))) 74 if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info)))
75 return -EINVAL; 75 return -EINVAL;
@@ -90,8 +90,7 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask,
90 return 0; 90 return 0;
91} 91}
92 92
93static struct ebt_match ebt_limit_reg = 93static struct ebt_match ebt_limit_reg __read_mostly = {
94{
95 .name = EBT_LIMIT_MATCH, 94 .name = EBT_LIMIT_MATCH,
96 .match = ebt_limit_match, 95 .match = ebt_limit_match,
97 .check = ebt_limit_check, 96 .check = ebt_limit_check,
@@ -110,4 +109,5 @@ static void __exit ebt_limit_fini(void)
110 109
111module_init(ebt_limit_init); 110module_init(ebt_limit_init);
112module_exit(ebt_limit_fini); 111module_exit(ebt_limit_fini);
112MODULE_DESCRIPTION("Ebtables: Rate-limit match");
113MODULE_LICENSE("GPL"); 113MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 457815fb5584..0b209e4aad0a 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -17,13 +17,14 @@
17#include <linux/in.h> 17#include <linux/in.h>
18#include <linux/if_arp.h> 18#include <linux/if_arp.h>
19#include <linux/spinlock.h> 19#include <linux/spinlock.h>
20#include <net/netfilter/nf_log.h>
20 21
21static DEFINE_SPINLOCK(ebt_log_lock); 22static DEFINE_SPINLOCK(ebt_log_lock);
22 23
23static int ebt_log_check(const char *tablename, unsigned int hookmask, 24static int ebt_log_check(const char *tablename, unsigned int hookmask,
24 const struct ebt_entry *e, void *data, unsigned int datalen) 25 const struct ebt_entry *e, void *data, unsigned int datalen)
25{ 26{
26 struct ebt_log_info *info = (struct ebt_log_info *)data; 27 struct ebt_log_info *info = data;
27 28
28 if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info))) 29 if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info)))
29 return -EINVAL; 30 return -EINVAL;
@@ -49,7 +50,7 @@ struct arppayload
49 unsigned char ip_dst[4]; 50 unsigned char ip_dst[4];
50}; 51};
51 52
52static void print_MAC(unsigned char *p) 53static void print_MAC(const unsigned char *p)
53{ 54{
54 int i; 55 int i;
55 56
@@ -83,7 +84,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
83 84
84 if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == 85 if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
85 htons(ETH_P_IP)){ 86 htons(ETH_P_IP)){
86 struct iphdr _iph, *ih; 87 const struct iphdr *ih;
88 struct iphdr _iph;
87 89
88 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); 90 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
89 if (ih == NULL) { 91 if (ih == NULL) {
@@ -98,7 +100,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
98 ih->protocol == IPPROTO_UDPLITE || 100 ih->protocol == IPPROTO_UDPLITE ||
99 ih->protocol == IPPROTO_SCTP || 101 ih->protocol == IPPROTO_SCTP ||
100 ih->protocol == IPPROTO_DCCP) { 102 ih->protocol == IPPROTO_DCCP) {
101 struct tcpudphdr _ports, *pptr; 103 const struct tcpudphdr *pptr;
104 struct tcpudphdr _ports;
102 105
103 pptr = skb_header_pointer(skb, ih->ihl*4, 106 pptr = skb_header_pointer(skb, ih->ihl*4,
104 sizeof(_ports), &_ports); 107 sizeof(_ports), &_ports);
@@ -115,7 +118,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
115 if ((bitmask & EBT_LOG_ARP) && 118 if ((bitmask & EBT_LOG_ARP) &&
116 ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || 119 ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) ||
117 (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { 120 (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) {
118 struct arphdr _arph, *ah; 121 const struct arphdr *ah;
122 struct arphdr _arph;
119 123
120 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); 124 ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
121 if (ah == NULL) { 125 if (ah == NULL) {
@@ -131,7 +135,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
131 if (ah->ar_hrd == htons(1) && 135 if (ah->ar_hrd == htons(1) &&
132 ah->ar_hln == ETH_ALEN && 136 ah->ar_hln == ETH_ALEN &&
133 ah->ar_pln == sizeof(__be32)) { 137 ah->ar_pln == sizeof(__be32)) {
134 struct arppayload _arpp, *ap; 138 const struct arppayload *ap;
139 struct arppayload _arpp;
135 140
136 ap = skb_header_pointer(skb, sizeof(_arph), 141 ap = skb_header_pointer(skb, sizeof(_arph),
137 sizeof(_arpp), &_arpp); 142 sizeof(_arpp), &_arpp);
@@ -159,7 +164,7 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
159 const struct net_device *in, const struct net_device *out, 164 const struct net_device *in, const struct net_device *out,
160 const void *data, unsigned int datalen) 165 const void *data, unsigned int datalen)
161{ 166{
162 struct ebt_log_info *info = (struct ebt_log_info *)data; 167 const struct ebt_log_info *info = data;
163 struct nf_loginfo li; 168 struct nf_loginfo li;
164 169
165 li.type = NF_LOG_TYPE_LOG; 170 li.type = NF_LOG_TYPE_LOG;
@@ -182,7 +187,7 @@ static struct ebt_watcher log =
182 .me = THIS_MODULE, 187 .me = THIS_MODULE,
183}; 188};
184 189
185static struct nf_logger ebt_log_logger = { 190static const struct nf_logger ebt_log_logger = {
186 .name = "ebt_log", 191 .name = "ebt_log",
187 .logfn = &ebt_log_packet, 192 .logfn = &ebt_log_packet,
188 .me = THIS_MODULE, 193 .me = THIS_MODULE,
@@ -207,4 +212,5 @@ static void __exit ebt_log_fini(void)
207 212
208module_init(ebt_log_init); 213module_init(ebt_log_init);
209module_exit(ebt_log_fini); 214module_exit(ebt_log_fini);
215MODULE_DESCRIPTION("Ebtables: Packet logging to syslog");
210MODULE_LICENSE("GPL"); 216MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 6cba54309c09..36723f47db0a 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -21,7 +21,7 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
21 const struct net_device *in, const struct net_device *out, 21 const struct net_device *in, const struct net_device *out,
22 const void *data, unsigned int datalen) 22 const void *data, unsigned int datalen)
23{ 23{
24 struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; 24 const struct ebt_mark_t_info *info = data;
25 int action = info->target & -16; 25 int action = info->target & -16;
26 26
27 if (action == MARK_SET_VALUE) 27 if (action == MARK_SET_VALUE)
@@ -39,7 +39,7 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
39static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, 39static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
40 const struct ebt_entry *e, void *data, unsigned int datalen) 40 const struct ebt_entry *e, void *data, unsigned int datalen)
41{ 41{
42 struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; 42 const struct ebt_mark_t_info *info = data;
43 int tmp; 43 int tmp;
44 44
45 if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) 45 if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
@@ -57,8 +57,7 @@ static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
57 return 0; 57 return 0;
58} 58}
59 59
60static struct ebt_target mark_target = 60static struct ebt_target mark_target __read_mostly = {
61{
62 .name = EBT_MARK_TARGET, 61 .name = EBT_MARK_TARGET,
63 .target = ebt_target_mark, 62 .target = ebt_target_mark,
64 .check = ebt_target_mark_check, 63 .check = ebt_target_mark_check,
@@ -77,4 +76,5 @@ static void __exit ebt_mark_fini(void)
77 76
78module_init(ebt_mark_init); 77module_init(ebt_mark_init);
79module_exit(ebt_mark_fini); 78module_exit(ebt_mark_fini);
79MODULE_DESCRIPTION("Ebtables: Packet mark modification");
80MODULE_LICENSE("GPL"); 80MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 6b0d2169af74..9b0a4543861f 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -16,7 +16,7 @@ static int ebt_filter_mark(const struct sk_buff *skb,
16 const struct net_device *in, const struct net_device *out, const void *data, 16 const struct net_device *in, const struct net_device *out, const void *data,
17 unsigned int datalen) 17 unsigned int datalen)
18{ 18{
19 struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; 19 const struct ebt_mark_m_info *info = data;
20 20
21 if (info->bitmask & EBT_MARK_OR) 21 if (info->bitmask & EBT_MARK_OR)
22 return !(!!(skb->mark & info->mask) ^ info->invert); 22 return !(!!(skb->mark & info->mask) ^ info->invert);
@@ -26,7 +26,7 @@ static int ebt_filter_mark(const struct sk_buff *skb,
26static int ebt_mark_check(const char *tablename, unsigned int hookmask, 26static int ebt_mark_check(const char *tablename, unsigned int hookmask,
27 const struct ebt_entry *e, void *data, unsigned int datalen) 27 const struct ebt_entry *e, void *data, unsigned int datalen)
28{ 28{
29 struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; 29 const struct ebt_mark_m_info *info = data;
30 30
31 if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info))) 31 if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info)))
32 return -EINVAL; 32 return -EINVAL;
@@ -39,8 +39,7 @@ static int ebt_mark_check(const char *tablename, unsigned int hookmask,
39 return 0; 39 return 0;
40} 40}
41 41
42static struct ebt_match filter_mark = 42static struct ebt_match filter_mark __read_mostly = {
43{
44 .name = EBT_MARK_MATCH, 43 .name = EBT_MARK_MATCH,
45 .match = ebt_filter_mark, 44 .match = ebt_filter_mark,
46 .check = ebt_mark_check, 45 .check = ebt_mark_check,
@@ -59,4 +58,5 @@ static void __exit ebt_mark_m_fini(void)
59 58
60module_init(ebt_mark_m_init); 59module_init(ebt_mark_m_init);
61module_exit(ebt_mark_m_fini); 60module_exit(ebt_mark_m_fini);
61MODULE_DESCRIPTION("Ebtables: Packet mark match");
62MODULE_LICENSE("GPL"); 62MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 4fffd70e4da7..676db32df3d1 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -18,7 +18,7 @@ static int ebt_filter_pkttype(const struct sk_buff *skb,
18 const void *data, 18 const void *data,
19 unsigned int datalen) 19 unsigned int datalen)
20{ 20{
21 struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; 21 const struct ebt_pkttype_info *info = data;
22 22
23 return (skb->pkt_type != info->pkt_type) ^ info->invert; 23 return (skb->pkt_type != info->pkt_type) ^ info->invert;
24} 24}
@@ -26,7 +26,7 @@ static int ebt_filter_pkttype(const struct sk_buff *skb,
26static int ebt_pkttype_check(const char *tablename, unsigned int hookmask, 26static int ebt_pkttype_check(const char *tablename, unsigned int hookmask,
27 const struct ebt_entry *e, void *data, unsigned int datalen) 27 const struct ebt_entry *e, void *data, unsigned int datalen)
28{ 28{
29 struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; 29 const struct ebt_pkttype_info *info = data;
30 30
31 if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info))) 31 if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info)))
32 return -EINVAL; 32 return -EINVAL;
@@ -36,8 +36,7 @@ static int ebt_pkttype_check(const char *tablename, unsigned int hookmask,
36 return 0; 36 return 0;
37} 37}
38 38
39static struct ebt_match filter_pkttype = 39static struct ebt_match filter_pkttype __read_mostly = {
40{
41 .name = EBT_PKTTYPE_MATCH, 40 .name = EBT_PKTTYPE_MATCH,
42 .match = ebt_filter_pkttype, 41 .match = ebt_filter_pkttype,
43 .check = ebt_pkttype_check, 42 .check = ebt_pkttype_check,
@@ -56,4 +55,5 @@ static void __exit ebt_pkttype_fini(void)
56 55
57module_init(ebt_pkttype_init); 56module_init(ebt_pkttype_init);
58module_exit(ebt_pkttype_fini); 57module_exit(ebt_pkttype_fini);
58MODULE_DESCRIPTION("Ebtables: Link layer packet type match");
59MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 422cb834cff9..bfdf2fb60b1f 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -19,7 +19,7 @@ static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
19 const struct net_device *in, const struct net_device *out, 19 const struct net_device *in, const struct net_device *out,
20 const void *data, unsigned int datalen) 20 const void *data, unsigned int datalen)
21{ 21{
22 struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; 22 const struct ebt_redirect_info *info = data;
23 23
24 if (skb_make_writable(skb, 0)) 24 if (skb_make_writable(skb, 0))
25 return NF_DROP; 25 return NF_DROP;
@@ -36,7 +36,7 @@ static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
36static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask, 36static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask,
37 const struct ebt_entry *e, void *data, unsigned int datalen) 37 const struct ebt_entry *e, void *data, unsigned int datalen)
38{ 38{
39 struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; 39 const struct ebt_redirect_info *info = data;
40 40
41 if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info))) 41 if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info)))
42 return -EINVAL; 42 return -EINVAL;
@@ -51,8 +51,7 @@ static int ebt_target_redirect_check(const char *tablename, unsigned int hookmas
51 return 0; 51 return 0;
52} 52}
53 53
54static struct ebt_target redirect_target = 54static struct ebt_target redirect_target __read_mostly = {
55{
56 .name = EBT_REDIRECT_TARGET, 55 .name = EBT_REDIRECT_TARGET,
57 .target = ebt_target_redirect, 56 .target = ebt_target_redirect,
58 .check = ebt_target_redirect_check, 57 .check = ebt_target_redirect_check,
@@ -71,4 +70,5 @@ static void __exit ebt_redirect_fini(void)
71 70
72module_init(ebt_redirect_init); 71module_init(ebt_redirect_init);
73module_exit(ebt_redirect_fini); 72module_exit(ebt_redirect_fini);
73MODULE_DESCRIPTION("Ebtables: Packet redirection to localhost");
74MODULE_LICENSE("GPL"); 74MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 425ac920904d..e252dabbb143 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -20,7 +20,7 @@ static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
20 const struct net_device *in, const struct net_device *out, 20 const struct net_device *in, const struct net_device *out,
21 const void *data, unsigned int datalen) 21 const void *data, unsigned int datalen)
22{ 22{
23 struct ebt_nat_info *info = (struct ebt_nat_info *) data; 23 const struct ebt_nat_info *info = data;
24 24
25 if (skb_make_writable(skb, 0)) 25 if (skb_make_writable(skb, 0))
26 return NF_DROP; 26 return NF_DROP;
@@ -28,7 +28,8 @@ static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
28 memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN); 28 memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN);
29 if (!(info->target & NAT_ARP_BIT) && 29 if (!(info->target & NAT_ARP_BIT) &&
30 eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { 30 eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
31 struct arphdr _ah, *ap; 31 const struct arphdr *ap;
32 struct arphdr _ah;
32 33
33 ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); 34 ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
34 if (ap == NULL) 35 if (ap == NULL)
@@ -45,7 +46,7 @@ out:
45static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, 46static int ebt_target_snat_check(const char *tablename, unsigned int hookmask,
46 const struct ebt_entry *e, void *data, unsigned int datalen) 47 const struct ebt_entry *e, void *data, unsigned int datalen)
47{ 48{
48 struct ebt_nat_info *info = (struct ebt_nat_info *) data; 49 const struct ebt_nat_info *info = data;
49 int tmp; 50 int tmp;
50 51
51 if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) 52 if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info)))
@@ -67,8 +68,7 @@ static int ebt_target_snat_check(const char *tablename, unsigned int hookmask,
67 return 0; 68 return 0;
68} 69}
69 70
70static struct ebt_target snat = 71static struct ebt_target snat __read_mostly = {
71{
72 .name = EBT_SNAT_TARGET, 72 .name = EBT_SNAT_TARGET,
73 .target = ebt_target_snat, 73 .target = ebt_target_snat,
74 .check = ebt_target_snat_check, 74 .check = ebt_target_snat_check,
@@ -87,4 +87,5 @@ static void __exit ebt_snat_fini(void)
87 87
88module_init(ebt_snat_init); 88module_init(ebt_snat_init);
89module_exit(ebt_snat_fini); 89module_exit(ebt_snat_fini);
90MODULE_DESCRIPTION("Ebtables: Source MAC address translation");
90MODULE_LICENSE("GPL"); 91MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 31b77367319c..40f36d37607d 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -40,10 +40,10 @@ struct stp_config_pdu {
40#define NR16(p) (p[0] << 8 | p[1]) 40#define NR16(p) (p[0] << 8 | p[1])
41#define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) 41#define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3])
42 42
43static int ebt_filter_config(struct ebt_stp_info *info, 43static int ebt_filter_config(const struct ebt_stp_info *info,
44 struct stp_config_pdu *stpc) 44 const struct stp_config_pdu *stpc)
45{ 45{
46 struct ebt_stp_config_info *c; 46 const struct ebt_stp_config_info *c;
47 uint16_t v16; 47 uint16_t v16;
48 uint32_t v32; 48 uint32_t v32;
49 int verdict, i; 49 int verdict, i;
@@ -122,9 +122,10 @@ static int ebt_filter_config(struct ebt_stp_info *info,
122static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in, 122static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in,
123 const struct net_device *out, const void *data, unsigned int datalen) 123 const struct net_device *out, const void *data, unsigned int datalen)
124{ 124{
125 struct ebt_stp_info *info = (struct ebt_stp_info *)data; 125 const struct ebt_stp_info *info = data;
126 struct stp_header _stph, *sp; 126 const struct stp_header *sp;
127 uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; 127 struct stp_header _stph;
128 const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
128 129
129 sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); 130 sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
130 if (sp == NULL) 131 if (sp == NULL)
@@ -140,7 +141,8 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in
140 141
141 if (sp->type == BPDU_TYPE_CONFIG && 142 if (sp->type == BPDU_TYPE_CONFIG &&
142 info->bitmask & EBT_STP_CONFIG_MASK) { 143 info->bitmask & EBT_STP_CONFIG_MASK) {
143 struct stp_config_pdu _stpc, *st; 144 const struct stp_config_pdu *st;
145 struct stp_config_pdu _stpc;
144 146
145 st = skb_header_pointer(skb, sizeof(_stph), 147 st = skb_header_pointer(skb, sizeof(_stph),
146 sizeof(_stpc), &_stpc); 148 sizeof(_stpc), &_stpc);
@@ -154,10 +156,10 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in
154static int ebt_stp_check(const char *tablename, unsigned int hookmask, 156static int ebt_stp_check(const char *tablename, unsigned int hookmask,
155 const struct ebt_entry *e, void *data, unsigned int datalen) 157 const struct ebt_entry *e, void *data, unsigned int datalen)
156{ 158{
157 struct ebt_stp_info *info = (struct ebt_stp_info *)data; 159 const struct ebt_stp_info *info = data;
158 int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); 160 const unsigned int len = EBT_ALIGN(sizeof(struct ebt_stp_info));
159 uint8_t bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; 161 const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
160 uint8_t msk[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 162 const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
161 163
162 if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || 164 if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
163 !(info->bitmask & EBT_STP_MASK)) 165 !(info->bitmask & EBT_STP_MASK))
@@ -172,8 +174,7 @@ static int ebt_stp_check(const char *tablename, unsigned int hookmask,
172 return 0; 174 return 0;
173} 175}
174 176
175static struct ebt_match filter_stp = 177static struct ebt_match filter_stp __read_mostly = {
176{
177 .name = EBT_STP_MATCH, 178 .name = EBT_STP_MATCH,
178 .match = ebt_filter_stp, 179 .match = ebt_filter_stp,
179 .check = ebt_stp_check, 180 .check = ebt_stp_check,
@@ -192,4 +193,5 @@ static void __exit ebt_stp_fini(void)
192 193
193module_init(ebt_stp_init); 194module_init(ebt_stp_init);
194module_exit(ebt_stp_fini); 195module_exit(ebt_stp_fini);
196MODULE_DESCRIPTION("Ebtables: Spanning Tree Protocol packet match");
195MODULE_LICENSE("GPL"); 197MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index e7cfd30bac75..2d4c9ef909fc 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -38,6 +38,7 @@
38#include <linux/netdevice.h> 38#include <linux/netdevice.h>
39#include <linux/netfilter_bridge/ebtables.h> 39#include <linux/netfilter_bridge/ebtables.h>
40#include <linux/netfilter_bridge/ebt_ulog.h> 40#include <linux/netfilter_bridge/ebt_ulog.h>
41#include <net/netfilter/nf_log.h>
41#include <net/sock.h> 42#include <net/sock.h>
42#include "../br_private.h" 43#include "../br_private.h"
43 44
@@ -248,7 +249,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
248 const struct net_device *in, const struct net_device *out, 249 const struct net_device *in, const struct net_device *out,
249 const void *data, unsigned int datalen) 250 const void *data, unsigned int datalen)
250{ 251{
251 struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; 252 const struct ebt_ulog_info *uloginfo = data;
252 253
253 ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL); 254 ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL);
254} 255}
@@ -257,7 +258,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
257static int ebt_ulog_check(const char *tablename, unsigned int hookmask, 258static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
258 const struct ebt_entry *e, void *data, unsigned int datalen) 259 const struct ebt_entry *e, void *data, unsigned int datalen)
259{ 260{
260 struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; 261 struct ebt_ulog_info *uloginfo = data;
261 262
262 if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || 263 if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) ||
263 uloginfo->nlgroup > 31) 264 uloginfo->nlgroup > 31)
@@ -271,14 +272,14 @@ static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
271 return 0; 272 return 0;
272} 273}
273 274
274static struct ebt_watcher ulog = { 275static struct ebt_watcher ulog __read_mostly = {
275 .name = EBT_ULOG_WATCHER, 276 .name = EBT_ULOG_WATCHER,
276 .watcher = ebt_ulog, 277 .watcher = ebt_ulog,
277 .check = ebt_ulog_check, 278 .check = ebt_ulog_check,
278 .me = THIS_MODULE, 279 .me = THIS_MODULE,
279}; 280};
280 281
281static struct nf_logger ebt_ulog_logger = { 282static const struct nf_logger ebt_ulog_logger = {
282 .name = EBT_ULOG_WATCHER, 283 .name = EBT_ULOG_WATCHER,
283 .logfn = &ebt_log_packet, 284 .logfn = &ebt_log_packet,
284 .me = THIS_MODULE, 285 .me = THIS_MODULE,
@@ -306,7 +307,7 @@ static int __init ebt_ulog_init(void)
306 if (!ebtulognl) 307 if (!ebtulognl)
307 ret = -ENOMEM; 308 ret = -ENOMEM;
308 else if ((ret = ebt_register_watcher(&ulog))) 309 else if ((ret = ebt_register_watcher(&ulog)))
309 sock_release(ebtulognl->sk_socket); 310 netlink_kernel_release(ebtulognl);
310 311
311 if (ret == 0) 312 if (ret == 0)
312 nf_log_register(PF_BRIDGE, &ebt_ulog_logger); 313 nf_log_register(PF_BRIDGE, &ebt_ulog_logger);
@@ -332,12 +333,11 @@ static void __exit ebt_ulog_fini(void)
332 } 333 }
333 spin_unlock_bh(&ub->lock); 334 spin_unlock_bh(&ub->lock);
334 } 335 }
335 sock_release(ebtulognl->sk_socket); 336 netlink_kernel_release(ebtulognl);
336} 337}
337 338
338module_init(ebt_ulog_init); 339module_init(ebt_ulog_init);
339module_exit(ebt_ulog_fini); 340module_exit(ebt_ulog_fini);
340MODULE_LICENSE("GPL"); 341MODULE_LICENSE("GPL");
341MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 342MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
342MODULE_DESCRIPTION("ebtables userspace logging module for bridged Ethernet" 343MODULE_DESCRIPTION("Ebtables: Packet logging to netlink using ULOG");
343 " frames");
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index a43c697d3d73..ab60b0dade80 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -31,15 +31,12 @@ static int debug;
31module_param(debug, int, 0); 31module_param(debug, int, 0);
32MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages"); 32MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages");
33MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>"); 33MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>");
34MODULE_DESCRIPTION("802.1Q match module (ebtables extension), v" 34MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match");
35 MODULE_VERS);
36MODULE_LICENSE("GPL"); 35MODULE_LICENSE("GPL");
37 36
38 37
39#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) 38#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
40#define INV_FLAG(_inv_flag_) (info->invflags & _inv_flag_) ? "!" : ""
41#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ 39#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
42#define SET_BITMASK(_BIT_MASK_) info->bitmask |= _BIT_MASK_
43#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;} 40#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;}
44 41
45static int 42static int
@@ -48,8 +45,9 @@ ebt_filter_vlan(const struct sk_buff *skb,
48 const struct net_device *out, 45 const struct net_device *out,
49 const void *data, unsigned int datalen) 46 const void *data, unsigned int datalen)
50{ 47{
51 struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; 48 const struct ebt_vlan_info *info = data;
52 struct vlan_hdr _frame, *fp; 49 const struct vlan_hdr *fp;
50 struct vlan_hdr _frame;
53 51
54 unsigned short TCI; /* Whole TCI, given from parsed frame */ 52 unsigned short TCI; /* Whole TCI, given from parsed frame */
55 unsigned short id; /* VLAN ID, given from frame TCI */ 53 unsigned short id; /* VLAN ID, given from frame TCI */
@@ -93,7 +91,7 @@ ebt_check_vlan(const char *tablename,
93 unsigned int hooknr, 91 unsigned int hooknr,
94 const struct ebt_entry *e, void *data, unsigned int datalen) 92 const struct ebt_entry *e, void *data, unsigned int datalen)
95{ 93{
96 struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; 94 struct ebt_vlan_info *info = data;
97 95
98 /* Parameters buffer overflow check */ 96 /* Parameters buffer overflow check */
99 if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) { 97 if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) {
@@ -171,7 +169,7 @@ ebt_check_vlan(const char *tablename,
171 return 0; 169 return 0;
172} 170}
173 171
174static struct ebt_match filter_vlan = { 172static struct ebt_match filter_vlan __read_mostly = {
175 .name = EBT_VLAN_MATCH, 173 .name = EBT_VLAN_MATCH,
176 .match = ebt_filter_vlan, 174 .match = ebt_filter_vlan,
177 .check = ebt_check_vlan, 175 .check = ebt_check_vlan,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index e44519ebf1d2..be6f18681053 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -70,13 +70,13 @@ static int __init ebtable_broute_init(void)
70 if (ret < 0) 70 if (ret < 0)
71 return ret; 71 return ret;
72 /* see br_input.c */ 72 /* see br_input.c */
73 br_should_route_hook = ebt_broute; 73 rcu_assign_pointer(br_should_route_hook, ebt_broute);
74 return ret; 74 return ret;
75} 75}
76 76
77static void __exit ebtable_broute_fini(void) 77static void __exit ebtable_broute_fini(void)
78{ 78{
79 br_should_route_hook = NULL; 79 rcu_assign_pointer(br_should_route_hook, NULL);
80 synchronize_net(); 80 synchronize_net();
81 ebt_unregister_table(&broute_table); 81 ebt_unregister_table(&broute_table);
82} 82}
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 210493f99bc4..fb810908732f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -67,7 +67,7 @@ ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
67 return ebt_do_table(hook, skb, in, out, &frame_filter); 67 return ebt_do_table(hook, skb, in, out, &frame_filter);
68} 68}
69 69
70static struct nf_hook_ops ebt_ops_filter[] = { 70static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
71 { 71 {
72 .hook = ebt_hook, 72 .hook = ebt_hook,
73 .owner = THIS_MODULE, 73 .owner = THIS_MODULE,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 3e58c2e5ee21..bc712730c54a 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -74,7 +74,7 @@ ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
74 return ebt_do_table(hook, skb, in, out, &frame_nat); 74 return ebt_do_table(hook, skb, in, out, &frame_nat);
75} 75}
76 76
77static struct nf_hook_ops ebt_ops_nat[] = { 77static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
78 { 78 {
79 .hook = ebt_nat_dst, 79 .hook = ebt_nat_dst,
80 .owner = THIS_MODULE, 80 .owner = THIS_MODULE,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 817169e718c1..32afff859e4a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -15,8 +15,6 @@
15 * 2 of the License, or (at your option) any later version. 15 * 2 of the License, or (at your option) any later version.
16 */ 16 */
17 17
18/* used for print_string */
19#include <linux/tty.h>
20 18
21#include <linux/kmod.h> 19#include <linux/kmod.h>
22#include <linux/module.h> 20#include <linux/module.h>
diff --git a/net/can/Kconfig b/net/can/Kconfig
new file mode 100644
index 000000000000..89395b2c8bca
--- /dev/null
+++ b/net/can/Kconfig
@@ -0,0 +1,44 @@
1#
2# Controller Area Network (CAN) network layer core configuration
3#
4
5menuconfig CAN
6 depends on NET
7 tristate "CAN bus subsystem support"
8 ---help---
9 Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
10 communications protocol which was developed by Bosch in
11 1991, mainly for automotive, but now widely used in marine
12 (NMEA2000), industrial, and medical applications.
13 More information on the CAN network protocol family PF_CAN
14 is contained in <Documentation/networking/can.txt>.
15
16 If you want CAN support you should say Y here and also to the
17 specific driver for your controller(s) below.
18
19config CAN_RAW
20 tristate "Raw CAN Protocol (raw access with CAN-ID filtering)"
21 depends on CAN
22 default N
23 ---help---
24 The raw CAN protocol option offers access to the CAN bus via
25 the BSD socket API. You probably want to use the raw socket in
26 most cases where no higher level protocol is being used. The raw
27 socket has several filter options e.g. ID masking / error frames.
28 To receive/send raw CAN messages, use AF_CAN with protocol CAN_RAW.
29
30config CAN_BCM
31 tristate "Broadcast Manager CAN Protocol (with content filtering)"
32 depends on CAN
33 default N
34 ---help---
35 The Broadcast Manager offers content filtering, timeout monitoring,
36 sending of RTR frames, and cyclic CAN messages without permanent user
37 interaction. The BCM can be 'programmed' via the BSD socket API and
38 informs you on demand e.g. only on content updates / timeouts.
39 You probably want to use the bcm socket in most cases where cyclic
40 CAN messages are used on the bus (e.g. in automotive environments).
41 To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM.
42
43
44source "drivers/net/can/Kconfig"
diff --git a/net/can/Makefile b/net/can/Makefile
new file mode 100644
index 000000000000..9cd3c4b3abda
--- /dev/null
+++ b/net/can/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for the Linux Controller Area Network core.
3#
4
5obj-$(CONFIG_CAN) += can.o
6can-objs := af_can.o proc.o
7
8obj-$(CONFIG_CAN_RAW) += can-raw.o
9can-raw-objs := raw.o
10
11obj-$(CONFIG_CAN_BCM) += can-bcm.o
12can-bcm-objs := bcm.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
new file mode 100644
index 000000000000..5158e886630f
--- /dev/null
+++ b/net/can/af_can.c
@@ -0,0 +1,861 @@
1/*
2 * af_can.c - Protocol family CAN core module
3 * (used by different CAN protocol modules)
4 *
5 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of Volkswagen nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * Alternatively, provided that this notice is retained in full, this
21 * software may be distributed under the terms of the GNU General
22 * Public License ("GPL") version 2, in which case the provisions of the
23 * GPL apply INSTEAD OF those given above.
24 *
25 * The provided data structures and external interfaces from this code
26 * are not restricted to be used by modules with a GPL compatible license.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
39 * DAMAGE.
40 *
41 * Send feedback to <socketcan-users@lists.berlios.de>
42 *
43 */
44
45#include <linux/module.h>
46#include <linux/init.h>
47#include <linux/kmod.h>
48#include <linux/slab.h>
49#include <linux/list.h>
50#include <linux/spinlock.h>
51#include <linux/rcupdate.h>
52#include <linux/uaccess.h>
53#include <linux/net.h>
54#include <linux/netdevice.h>
55#include <linux/socket.h>
56#include <linux/if_ether.h>
57#include <linux/if_arp.h>
58#include <linux/skbuff.h>
59#include <linux/can.h>
60#include <linux/can/core.h>
61#include <net/net_namespace.h>
62#include <net/sock.h>
63
64#include "af_can.h"
65
66static __initdata const char banner[] = KERN_INFO
67 "can: controller area network core (" CAN_VERSION_STRING ")\n";
68
69MODULE_DESCRIPTION("Controller Area Network PF_CAN core");
70MODULE_LICENSE("Dual BSD/GPL");
71MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, "
72 "Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
73
74MODULE_ALIAS_NETPROTO(PF_CAN);
75
76static int stats_timer __read_mostly = 1;
77module_param(stats_timer, int, S_IRUGO);
78MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
79
80HLIST_HEAD(can_rx_dev_list);
81static struct dev_rcv_lists can_rx_alldev_list;
82static DEFINE_SPINLOCK(can_rcvlists_lock);
83
84static struct kmem_cache *rcv_cache __read_mostly;
85
86/* table of registered CAN protocols */
87static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
88static DEFINE_SPINLOCK(proto_tab_lock);
89
90struct timer_list can_stattimer; /* timer for statistics update */
91struct s_stats can_stats; /* packet statistics */
92struct s_pstats can_pstats; /* receive list statistics */
93
94/*
95 * af_can socket functions
96 */
97
98static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
99{
100 struct sock *sk = sock->sk;
101
102 switch (cmd) {
103
104 case SIOCGSTAMP:
105 return sock_get_timestamp(sk, (struct timeval __user *)arg);
106
107 default:
108 return -ENOIOCTLCMD;
109 }
110}
111
112static void can_sock_destruct(struct sock *sk)
113{
114 skb_queue_purge(&sk->sk_receive_queue);
115}
116
117static int can_create(struct net *net, struct socket *sock, int protocol)
118{
119 struct sock *sk;
120 struct can_proto *cp;
121 char module_name[sizeof("can-proto-000")];
122 int err = 0;
123
124 sock->state = SS_UNCONNECTED;
125
126 if (protocol < 0 || protocol >= CAN_NPROTO)
127 return -EINVAL;
128
129 if (net != &init_net)
130 return -EAFNOSUPPORT;
131
132 /* try to load protocol module, when CONFIG_KMOD is defined */
133 if (!proto_tab[protocol]) {
134 sprintf(module_name, "can-proto-%d", protocol);
135 err = request_module(module_name);
136
137 /*
138 * In case of error we only print a message but don't
139 * return the error code immediately. Below we will
140 * return -EPROTONOSUPPORT
141 */
142 if (err == -ENOSYS) {
143 if (printk_ratelimit())
144 printk(KERN_INFO "can: request_module(%s)"
145 " not implemented.\n", module_name);
146 } else if (err) {
147 if (printk_ratelimit())
148 printk(KERN_ERR "can: request_module(%s)"
149 " failed.\n", module_name);
150 }
151 }
152
153 spin_lock(&proto_tab_lock);
154 cp = proto_tab[protocol];
155 if (cp && !try_module_get(cp->prot->owner))
156 cp = NULL;
157 spin_unlock(&proto_tab_lock);
158
159 /* check for available protocol and correct usage */
160
161 if (!cp)
162 return -EPROTONOSUPPORT;
163
164 if (cp->type != sock->type) {
165 err = -EPROTONOSUPPORT;
166 goto errout;
167 }
168
169 if (cp->capability >= 0 && !capable(cp->capability)) {
170 err = -EPERM;
171 goto errout;
172 }
173
174 sock->ops = cp->ops;
175
176 sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot);
177 if (!sk) {
178 err = -ENOMEM;
179 goto errout;
180 }
181
182 sock_init_data(sock, sk);
183 sk->sk_destruct = can_sock_destruct;
184
185 if (sk->sk_prot->init)
186 err = sk->sk_prot->init(sk);
187
188 if (err) {
189 /* release sk on errors */
190 sock_orphan(sk);
191 sock_put(sk);
192 }
193
194 errout:
195 module_put(cp->prot->owner);
196 return err;
197}
198
199/*
200 * af_can tx path
201 */
202
203/**
204 * can_send - transmit a CAN frame (optional with local loopback)
205 * @skb: pointer to socket buffer with CAN frame in data section
206 * @loop: loopback for listeners on local CAN sockets (recommended default!)
207 *
208 * Return:
209 * 0 on success
210 * -ENETDOWN when the selected interface is down
211 * -ENOBUFS on full driver queue (see net_xmit_errno())
212 * -ENOMEM when local loopback failed at calling skb_clone()
213 * -EPERM when trying to send on a non-CAN interface
214 */
215int can_send(struct sk_buff *skb, int loop)
216{
217 int err;
218
219 if (skb->dev->type != ARPHRD_CAN) {
220 kfree_skb(skb);
221 return -EPERM;
222 }
223
224 if (!(skb->dev->flags & IFF_UP)) {
225 kfree_skb(skb);
226 return -ENETDOWN;
227 }
228
229 skb->protocol = htons(ETH_P_CAN);
230 skb_reset_network_header(skb);
231 skb_reset_transport_header(skb);
232
233 if (loop) {
234 /* local loopback of sent CAN frames */
235
236 /* indication for the CAN driver: do loopback */
237 skb->pkt_type = PACKET_LOOPBACK;
238
239 /*
240 * The reference to the originating sock may be required
241 * by the receiving socket to check whether the frame is
242 * its own. Example: can_raw sockopt CAN_RAW_RECV_OWN_MSGS
243 * Therefore we have to ensure that skb->sk remains the
244 * reference to the originating sock by restoring skb->sk
245 * after each skb_clone() or skb_orphan() usage.
246 */
247
248 if (!(skb->dev->flags & IFF_ECHO)) {
249 /*
250 * If the interface is not capable to do loopback
251 * itself, we do it here.
252 */
253 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
254
255 if (!newskb) {
256 kfree_skb(skb);
257 return -ENOMEM;
258 }
259
260 newskb->sk = skb->sk;
261 newskb->ip_summed = CHECKSUM_UNNECESSARY;
262 newskb->pkt_type = PACKET_BROADCAST;
263 netif_rx(newskb);
264 }
265 } else {
266 /* indication for the CAN driver: no loopback required */
267 skb->pkt_type = PACKET_HOST;
268 }
269
270 /* send to netdevice */
271 err = dev_queue_xmit(skb);
272 if (err > 0)
273 err = net_xmit_errno(err);
274
275 /* update statistics */
276 can_stats.tx_frames++;
277 can_stats.tx_frames_delta++;
278
279 return err;
280}
281EXPORT_SYMBOL(can_send);
282
283/*
284 * af_can rx path
285 */
286
287static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
288{
289 struct dev_rcv_lists *d = NULL;
290 struct hlist_node *n;
291
292 /*
293 * find receive list for this device
294 *
295 * The hlist_for_each_entry*() macros curse through the list
296 * using the pointer variable n and set d to the containing
297 * struct in each list iteration. Therefore, after list
298 * iteration, d is unmodified when the list is empty, and it
299 * points to last list element, when the list is non-empty
300 * but no match in the loop body is found. I.e. d is *not*
301 * NULL when no match is found. We can, however, use the
302 * cursor variable n to decide if a match was found.
303 */
304
305 hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
306 if (d->dev == dev)
307 break;
308 }
309
310 return n ? d : NULL;
311}
312
313static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
314 struct dev_rcv_lists *d)
315{
316 canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
317
318 /* filter error frames */
319 if (*mask & CAN_ERR_FLAG) {
320 /* clear CAN_ERR_FLAG in list entry */
321 *mask &= CAN_ERR_MASK;
322 return &d->rx[RX_ERR];
323 }
324
325 /* ensure valid values in can_mask */
326 if (*mask & CAN_EFF_FLAG)
327 *mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG);
328 else
329 *mask &= (CAN_SFF_MASK | CAN_RTR_FLAG);
330
331 /* reduce condition testing at receive time */
332 *can_id &= *mask;
333
334 /* inverse can_id/can_mask filter */
335 if (inv)
336 return &d->rx[RX_INV];
337
338 /* mask == 0 => no condition testing at receive time */
339 if (!(*mask))
340 return &d->rx[RX_ALL];
341
342 /* use extra filterset for the subscription of exactly *ONE* can_id */
343 if (*can_id & CAN_EFF_FLAG) {
344 if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) {
345 /* RFC: a use-case for hash-tables in the future? */
346 return &d->rx[RX_EFF];
347 }
348 } else {
349 if (*mask == CAN_SFF_MASK)
350 return &d->rx_sff[*can_id];
351 }
352
353 /* default: filter via can_id/can_mask */
354 return &d->rx[RX_FIL];
355}
356
357/**
358 * can_rx_register - subscribe CAN frames from a specific interface
359 * @dev: pointer to netdevice (NULL => subcribe from 'all' CAN devices list)
360 * @can_id: CAN identifier (see description)
361 * @mask: CAN mask (see description)
362 * @func: callback function on filter match
363 * @data: returned parameter for callback function
364 * @ident: string for calling module indentification
365 *
366 * Description:
367 * Invokes the callback function with the received sk_buff and the given
368 * parameter 'data' on a matching receive filter. A filter matches, when
369 *
370 * <received_can_id> & mask == can_id & mask
371 *
372 * The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can
373 * filter for error frames (CAN_ERR_FLAG bit set in mask).
374 *
375 * Return:
376 * 0 on success
377 * -ENOMEM on missing cache mem to create subscription entry
378 * -ENODEV unknown device
379 */
380int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
381 void (*func)(struct sk_buff *, void *), void *data,
382 char *ident)
383{
384 struct receiver *r;
385 struct hlist_head *rl;
386 struct dev_rcv_lists *d;
387 int err = 0;
388
389 /* insert new receiver (dev,canid,mask) -> (func,data) */
390
391 r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
392 if (!r)
393 return -ENOMEM;
394
395 spin_lock(&can_rcvlists_lock);
396
397 d = find_dev_rcv_lists(dev);
398 if (d) {
399 rl = find_rcv_list(&can_id, &mask, d);
400
401 r->can_id = can_id;
402 r->mask = mask;
403 r->matches = 0;
404 r->func = func;
405 r->data = data;
406 r->ident = ident;
407
408 hlist_add_head_rcu(&r->list, rl);
409 d->entries++;
410
411 can_pstats.rcv_entries++;
412 if (can_pstats.rcv_entries_max < can_pstats.rcv_entries)
413 can_pstats.rcv_entries_max = can_pstats.rcv_entries;
414 } else {
415 kmem_cache_free(rcv_cache, r);
416 err = -ENODEV;
417 }
418
419 spin_unlock(&can_rcvlists_lock);
420
421 return err;
422}
423EXPORT_SYMBOL(can_rx_register);
424
425/*
426 * can_rx_delete_device - rcu callback for dev_rcv_lists structure removal
427 */
428static void can_rx_delete_device(struct rcu_head *rp)
429{
430 struct dev_rcv_lists *d = container_of(rp, struct dev_rcv_lists, rcu);
431
432 kfree(d);
433}
434
435/*
436 * can_rx_delete_receiver - rcu callback for single receiver entry removal
437 */
438static void can_rx_delete_receiver(struct rcu_head *rp)
439{
440 struct receiver *r = container_of(rp, struct receiver, rcu);
441
442 kmem_cache_free(rcv_cache, r);
443}
444
445/**
446 * can_rx_unregister - unsubscribe CAN frames from a specific interface
447 * @dev: pointer to netdevice (NULL => unsubcribe from 'all' CAN devices list)
448 * @can_id: CAN identifier
449 * @mask: CAN mask
450 * @func: callback function on filter match
451 * @data: returned parameter for callback function
452 *
453 * Description:
454 * Removes subscription entry depending on given (subscription) values.
455 */
456void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
457 void (*func)(struct sk_buff *, void *), void *data)
458{
459 struct receiver *r = NULL;
460 struct hlist_head *rl;
461 struct hlist_node *next;
462 struct dev_rcv_lists *d;
463
464 spin_lock(&can_rcvlists_lock);
465
466 d = find_dev_rcv_lists(dev);
467 if (!d) {
468 printk(KERN_ERR "BUG: receive list not found for "
469 "dev %s, id %03X, mask %03X\n",
470 DNAME(dev), can_id, mask);
471 goto out;
472 }
473
474 rl = find_rcv_list(&can_id, &mask, d);
475
476 /*
477 * Search the receiver list for the item to delete. This should
478 * exist, since no receiver may be unregistered that hasn't
479 * been registered before.
480 */
481
482 hlist_for_each_entry_rcu(r, next, rl, list) {
483 if (r->can_id == can_id && r->mask == mask
484 && r->func == func && r->data == data)
485 break;
486 }
487
488 /*
489 * Check for bugs in CAN protocol implementations:
490 * If no matching list item was found, the list cursor variable next
491 * will be NULL, while r will point to the last item of the list.
492 */
493
494 if (!next) {
495 printk(KERN_ERR "BUG: receive list entry not found for "
496 "dev %s, id %03X, mask %03X\n",
497 DNAME(dev), can_id, mask);
498 r = NULL;
499 d = NULL;
500 goto out;
501 }
502
503 hlist_del_rcu(&r->list);
504 d->entries--;
505
506 if (can_pstats.rcv_entries > 0)
507 can_pstats.rcv_entries--;
508
509 /* remove device structure requested by NETDEV_UNREGISTER */
510 if (d->remove_on_zero_entries && !d->entries)
511 hlist_del_rcu(&d->list);
512 else
513 d = NULL;
514
515 out:
516 spin_unlock(&can_rcvlists_lock);
517
518 /* schedule the receiver item for deletion */
519 if (r)
520 call_rcu(&r->rcu, can_rx_delete_receiver);
521
522 /* schedule the device structure for deletion */
523 if (d)
524 call_rcu(&d->rcu, can_rx_delete_device);
525}
526EXPORT_SYMBOL(can_rx_unregister);
527
528static inline void deliver(struct sk_buff *skb, struct receiver *r)
529{
530 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
531
532 if (clone) {
533 clone->sk = skb->sk;
534 r->func(clone, r->data);
535 r->matches++;
536 }
537}
538
539static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
540{
541 struct receiver *r;
542 struct hlist_node *n;
543 int matches = 0;
544 struct can_frame *cf = (struct can_frame *)skb->data;
545 canid_t can_id = cf->can_id;
546
547 if (d->entries == 0)
548 return 0;
549
550 if (can_id & CAN_ERR_FLAG) {
551 /* check for error frame entries only */
552 hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) {
553 if (can_id & r->mask) {
554 deliver(skb, r);
555 matches++;
556 }
557 }
558 return matches;
559 }
560
561 /* check for unfiltered entries */
562 hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) {
563 deliver(skb, r);
564 matches++;
565 }
566
567 /* check for can_id/mask entries */
568 hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) {
569 if ((can_id & r->mask) == r->can_id) {
570 deliver(skb, r);
571 matches++;
572 }
573 }
574
575 /* check for inverted can_id/mask entries */
576 hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) {
577 if ((can_id & r->mask) != r->can_id) {
578 deliver(skb, r);
579 matches++;
580 }
581 }
582
583 /* check CAN_ID specific entries */
584 if (can_id & CAN_EFF_FLAG) {
585 hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) {
586 if (r->can_id == can_id) {
587 deliver(skb, r);
588 matches++;
589 }
590 }
591 } else {
592 can_id &= CAN_SFF_MASK;
593 hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) {
594 deliver(skb, r);
595 matches++;
596 }
597 }
598
599 return matches;
600}
601
602static int can_rcv(struct sk_buff *skb, struct net_device *dev,
603 struct packet_type *pt, struct net_device *orig_dev)
604{
605 struct dev_rcv_lists *d;
606 int matches;
607
608 if (dev->type != ARPHRD_CAN || dev->nd_net != &init_net) {
609 kfree_skb(skb);
610 return 0;
611 }
612
613 /* update statistics */
614 can_stats.rx_frames++;
615 can_stats.rx_frames_delta++;
616
617 rcu_read_lock();
618
619 /* deliver the packet to sockets listening on all devices */
620 matches = can_rcv_filter(&can_rx_alldev_list, skb);
621
622 /* find receive list for this device */
623 d = find_dev_rcv_lists(dev);
624 if (d)
625 matches += can_rcv_filter(d, skb);
626
627 rcu_read_unlock();
628
629 /* free the skbuff allocated by the netdevice driver */
630 kfree_skb(skb);
631
632 if (matches > 0) {
633 can_stats.matches++;
634 can_stats.matches_delta++;
635 }
636
637 return 0;
638}
639
640/*
641 * af_can protocol functions
642 */
643
644/**
645 * can_proto_register - register CAN transport protocol
646 * @cp: pointer to CAN protocol structure
647 *
648 * Return:
649 * 0 on success
650 * -EINVAL invalid (out of range) protocol number
651 * -EBUSY protocol already in use
652 * -ENOBUF if proto_register() fails
653 */
654int can_proto_register(struct can_proto *cp)
655{
656 int proto = cp->protocol;
657 int err = 0;
658
659 if (proto < 0 || proto >= CAN_NPROTO) {
660 printk(KERN_ERR "can: protocol number %d out of range\n",
661 proto);
662 return -EINVAL;
663 }
664
665 spin_lock(&proto_tab_lock);
666 if (proto_tab[proto]) {
667 printk(KERN_ERR "can: protocol %d already registered\n",
668 proto);
669 err = -EBUSY;
670 goto errout;
671 }
672
673 err = proto_register(cp->prot, 0);
674 if (err < 0)
675 goto errout;
676
677 proto_tab[proto] = cp;
678
679 /* use generic ioctl function if the module doesn't bring its own */
680 if (!cp->ops->ioctl)
681 cp->ops->ioctl = can_ioctl;
682
683 errout:
684 spin_unlock(&proto_tab_lock);
685
686 return err;
687}
688EXPORT_SYMBOL(can_proto_register);
689
690/**
691 * can_proto_unregister - unregister CAN transport protocol
692 * @cp: pointer to CAN protocol structure
693 */
694void can_proto_unregister(struct can_proto *cp)
695{
696 int proto = cp->protocol;
697
698 spin_lock(&proto_tab_lock);
699 if (!proto_tab[proto]) {
700 printk(KERN_ERR "BUG: can: protocol %d is not registered\n",
701 proto);
702 }
703 proto_unregister(cp->prot);
704 proto_tab[proto] = NULL;
705 spin_unlock(&proto_tab_lock);
706}
707EXPORT_SYMBOL(can_proto_unregister);
708
709/*
710 * af_can notifier to create/remove CAN netdevice specific structs
711 */
712static int can_notifier(struct notifier_block *nb, unsigned long msg,
713 void *data)
714{
715 struct net_device *dev = (struct net_device *)data;
716 struct dev_rcv_lists *d;
717
718 if (dev->nd_net != &init_net)
719 return NOTIFY_DONE;
720
721 if (dev->type != ARPHRD_CAN)
722 return NOTIFY_DONE;
723
724 switch (msg) {
725
726 case NETDEV_REGISTER:
727
728 /*
729 * create new dev_rcv_lists for this device
730 *
731 * N.B. zeroing the struct is the correct initialization
732 * for the embedded hlist_head structs.
733 * Another list type, e.g. list_head, would require
734 * explicit initialization.
735 */
736
737 d = kzalloc(sizeof(*d), GFP_KERNEL);
738 if (!d) {
739 printk(KERN_ERR
740 "can: allocation of receive list failed\n");
741 return NOTIFY_DONE;
742 }
743 d->dev = dev;
744
745 spin_lock(&can_rcvlists_lock);
746 hlist_add_head_rcu(&d->list, &can_rx_dev_list);
747 spin_unlock(&can_rcvlists_lock);
748
749 break;
750
751 case NETDEV_UNREGISTER:
752 spin_lock(&can_rcvlists_lock);
753
754 d = find_dev_rcv_lists(dev);
755 if (d) {
756 if (d->entries) {
757 d->remove_on_zero_entries = 1;
758 d = NULL;
759 } else
760 hlist_del_rcu(&d->list);
761 } else
762 printk(KERN_ERR "can: notifier: receive list not "
763 "found for dev %s\n", dev->name);
764
765 spin_unlock(&can_rcvlists_lock);
766
767 if (d)
768 call_rcu(&d->rcu, can_rx_delete_device);
769
770 break;
771 }
772
773 return NOTIFY_DONE;
774}
775
776/*
777 * af_can module init/exit functions
778 */
779
780static struct packet_type can_packet __read_mostly = {
781 .type = __constant_htons(ETH_P_CAN),
782 .dev = NULL,
783 .func = can_rcv,
784};
785
786static struct net_proto_family can_family_ops __read_mostly = {
787 .family = PF_CAN,
788 .create = can_create,
789 .owner = THIS_MODULE,
790};
791
792/* notifier block for netdevice event */
793static struct notifier_block can_netdev_notifier __read_mostly = {
794 .notifier_call = can_notifier,
795};
796
797static __init int can_init(void)
798{
799 printk(banner);
800
801 rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
802 0, 0, NULL);
803 if (!rcv_cache)
804 return -ENOMEM;
805
806 /*
807 * Insert can_rx_alldev_list for reception on all devices.
808 * This struct is zero initialized which is correct for the
809 * embedded hlist heads, the dev pointer, and the entries counter.
810 */
811
812 spin_lock(&can_rcvlists_lock);
813 hlist_add_head_rcu(&can_rx_alldev_list.list, &can_rx_dev_list);
814 spin_unlock(&can_rcvlists_lock);
815
816 if (stats_timer) {
817 /* the statistics are updated every second (timer triggered) */
818 setup_timer(&can_stattimer, can_stat_update, 0);
819 mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
820 } else
821 can_stattimer.function = NULL;
822
823 can_init_proc();
824
825 /* protocol register */
826 sock_register(&can_family_ops);
827 register_netdevice_notifier(&can_netdev_notifier);
828 dev_add_pack(&can_packet);
829
830 return 0;
831}
832
833static __exit void can_exit(void)
834{
835 struct dev_rcv_lists *d;
836 struct hlist_node *n, *next;
837
838 if (stats_timer)
839 del_timer(&can_stattimer);
840
841 can_remove_proc();
842
843 /* protocol unregister */
844 dev_remove_pack(&can_packet);
845 unregister_netdevice_notifier(&can_netdev_notifier);
846 sock_unregister(PF_CAN);
847
848 /* remove can_rx_dev_list */
849 spin_lock(&can_rcvlists_lock);
850 hlist_del(&can_rx_alldev_list.list);
851 hlist_for_each_entry_safe(d, n, next, &can_rx_dev_list, list) {
852 hlist_del(&d->list);
853 kfree(d);
854 }
855 spin_unlock(&can_rcvlists_lock);
856
857 kmem_cache_destroy(rcv_cache);
858}
859
860module_init(can_init);
861module_exit(can_exit);
diff --git a/net/can/af_can.h b/net/can/af_can.h
new file mode 100644
index 000000000000..18f91e37cc30
--- /dev/null
+++ b/net/can/af_can.h
@@ -0,0 +1,122 @@
1/*
2 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Volkswagen nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * Alternatively, provided that this notice is retained in full, this
18 * software may be distributed under the terms of the GNU General
19 * Public License ("GPL") version 2, in which case the provisions of the
20 * GPL apply INSTEAD OF those given above.
21 *
22 * The provided data structures and external interfaces from this code
23 * are not restricted to be used by modules with a GPL compatible license.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
36 * DAMAGE.
37 *
38 * Send feedback to <socketcan-users@lists.berlios.de>
39 *
40 */
41
42#ifndef AF_CAN_H
43#define AF_CAN_H
44
45#include <linux/skbuff.h>
46#include <linux/netdevice.h>
47#include <linux/list.h>
48#include <linux/rcupdate.h>
49#include <linux/can.h>
50
51/* af_can rx dispatcher structures */
52
53struct receiver {
54 struct hlist_node list;
55 struct rcu_head rcu;
56 canid_t can_id;
57 canid_t mask;
58 unsigned long matches;
59 void (*func)(struct sk_buff *, void *);
60 void *data;
61 char *ident;
62};
63
64enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX };
65
66struct dev_rcv_lists {
67 struct hlist_node list;
68 struct rcu_head rcu;
69 struct net_device *dev;
70 struct hlist_head rx[RX_MAX];
71 struct hlist_head rx_sff[0x800];
72 int remove_on_zero_entries;
73 int entries;
74};
75
76/* statistic structures */
77
78/* can be reset e.g. by can_init_stats() */
79struct s_stats {
80 unsigned long jiffies_init;
81
82 unsigned long rx_frames;
83 unsigned long tx_frames;
84 unsigned long matches;
85
86 unsigned long total_rx_rate;
87 unsigned long total_tx_rate;
88 unsigned long total_rx_match_ratio;
89
90 unsigned long current_rx_rate;
91 unsigned long current_tx_rate;
92 unsigned long current_rx_match_ratio;
93
94 unsigned long max_rx_rate;
95 unsigned long max_tx_rate;
96 unsigned long max_rx_match_ratio;
97
98 unsigned long rx_frames_delta;
99 unsigned long tx_frames_delta;
100 unsigned long matches_delta;
101};
102
103/* persistent statistics */
104struct s_pstats {
105 unsigned long stats_reset;
106 unsigned long user_reset;
107 unsigned long rcv_entries;
108 unsigned long rcv_entries_max;
109};
110
111/* function prototypes for the CAN networklayer procfs (proc.c) */
112extern void can_init_proc(void);
113extern void can_remove_proc(void);
114extern void can_stat_update(unsigned long data);
115
116/* structures and variables from af_can.c needed in proc.c for reading */
117extern struct timer_list can_stattimer; /* timer for statistics update */
118extern struct s_stats can_stats; /* packet statistics */
119extern struct s_pstats can_pstats; /* receive list statistics */
120extern struct hlist_head can_rx_dev_list; /* rx dispatcher structures */
121
122#endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
new file mode 100644
index 000000000000..bd4282dae754
--- /dev/null
+++ b/net/can/bcm.c
@@ -0,0 +1,1561 @@
1/*
2 * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
3 *
4 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Volkswagen nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * Alternatively, provided that this notice is retained in full, this
20 * software may be distributed under the terms of the GNU General
21 * Public License ("GPL") version 2, in which case the provisions of the
22 * GPL apply INSTEAD OF those given above.
23 *
24 * The provided data structures and external interfaces from this code
25 * are not restricted to be used by modules with a GPL compatible license.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 *
40 * Send feedback to <socketcan-users@lists.berlios.de>
41 *
42 */
43
44#include <linux/module.h>
45#include <linux/init.h>
46#include <linux/list.h>
47#include <linux/proc_fs.h>
48#include <linux/uio.h>
49#include <linux/net.h>
50#include <linux/netdevice.h>
51#include <linux/socket.h>
52#include <linux/if_arp.h>
53#include <linux/skbuff.h>
54#include <linux/can.h>
55#include <linux/can/core.h>
56#include <linux/can/bcm.h>
57#include <net/sock.h>
58#include <net/net_namespace.h>
59
60/* use of last_frames[index].can_dlc */
61#define RX_RECV 0x40 /* received data for this element */
62#define RX_THR 0x80 /* element not been sent due to throttle feature */
63#define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */
64
65/* get best masking value for can_rx_register() for a given single can_id */
66#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \
67 (CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK))
68
69#define CAN_BCM_VERSION CAN_VERSION
70static __initdata const char banner[] = KERN_INFO
71 "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n";
72
73MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
74MODULE_LICENSE("Dual BSD/GPL");
75MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
76
77/* easy access to can_frame payload */
78static inline u64 GET_U64(const struct can_frame *cp)
79{
80 return *(u64 *)cp->data;
81}
82
83struct bcm_op {
84 struct list_head list;
85 int ifindex;
86 canid_t can_id;
87 int flags;
88 unsigned long j_ival1, j_ival2, j_lastmsg;
89 unsigned long frames_abs, frames_filtered;
90 struct timer_list timer, thrtimer;
91 struct timeval ival1, ival2;
92 ktime_t rx_stamp;
93 int rx_ifindex;
94 int count;
95 int nframes;
96 int currframe;
97 struct can_frame *frames;
98 struct can_frame *last_frames;
99 struct can_frame sframe;
100 struct can_frame last_sframe;
101 struct sock *sk;
102 struct net_device *rx_reg_dev;
103};
104
105static struct proc_dir_entry *proc_dir;
106
107struct bcm_sock {
108 struct sock sk;
109 int bound;
110 int ifindex;
111 struct notifier_block notifier;
112 struct list_head rx_ops;
113 struct list_head tx_ops;
114 unsigned long dropped_usr_msgs;
115 struct proc_dir_entry *bcm_proc_read;
116 char procname [9]; /* pointer printed in ASCII with \0 */
117};
118
119static inline struct bcm_sock *bcm_sk(const struct sock *sk)
120{
121 return (struct bcm_sock *)sk;
122}
123
124#define CFSIZ sizeof(struct can_frame)
125#define OPSIZ sizeof(struct bcm_op)
126#define MHSIZ sizeof(struct bcm_msg_head)
127
128/*
129 * rounded_tv2jif - calculate jiffies from timeval including optional up
130 * @tv: pointer to timeval
131 *
132 * Description:
133 * Unlike timeval_to_jiffies() provided in include/linux/jiffies.h, this
134 * function is intentionally more relaxed on precise timer ticks to get
135 * exact one jiffy for requested 1000us on a 1000HZ machine.
136 * This code is to be removed when upgrading to kernel hrtimer.
137 *
138 * Return:
139 * calculated jiffies (max: ULONG_MAX)
140 */
141static unsigned long rounded_tv2jif(const struct timeval *tv)
142{
143 unsigned long sec = tv->tv_sec;
144 unsigned long usec = tv->tv_usec;
145 unsigned long jif;
146
147 if (sec > ULONG_MAX / HZ)
148 return ULONG_MAX;
149
150 /* round up to get at least the requested time */
151 usec += 1000000 / HZ - 1;
152
153 jif = usec / (1000000 / HZ);
154
155 if (sec * HZ > ULONG_MAX - jif)
156 return ULONG_MAX;
157
158 return jif + sec * HZ;
159}
160
161/*
162 * procfs functions
163 */
164static char *bcm_proc_getifname(int ifindex)
165{
166 struct net_device *dev;
167
168 if (!ifindex)
169 return "any";
170
171 /* no usage counting */
172 dev = __dev_get_by_index(&init_net, ifindex);
173 if (dev)
174 return dev->name;
175
176 return "???";
177}
178
179static int bcm_read_proc(char *page, char **start, off_t off,
180 int count, int *eof, void *data)
181{
182 int len = 0;
183 struct sock *sk = (struct sock *)data;
184 struct bcm_sock *bo = bcm_sk(sk);
185 struct bcm_op *op;
186
187 len += snprintf(page + len, PAGE_SIZE - len, ">>> socket %p",
188 sk->sk_socket);
189 len += snprintf(page + len, PAGE_SIZE - len, " / sk %p", sk);
190 len += snprintf(page + len, PAGE_SIZE - len, " / bo %p", bo);
191 len += snprintf(page + len, PAGE_SIZE - len, " / dropped %lu",
192 bo->dropped_usr_msgs);
193 len += snprintf(page + len, PAGE_SIZE - len, " / bound %s",
194 bcm_proc_getifname(bo->ifindex));
195 len += snprintf(page + len, PAGE_SIZE - len, " <<<\n");
196
197 list_for_each_entry(op, &bo->rx_ops, list) {
198
199 unsigned long reduction;
200
201 /* print only active entries & prevent division by zero */
202 if (!op->frames_abs)
203 continue;
204
205 len += snprintf(page + len, PAGE_SIZE - len,
206 "rx_op: %03X %-5s ",
207 op->can_id, bcm_proc_getifname(op->ifindex));
208 len += snprintf(page + len, PAGE_SIZE - len, "[%d]%c ",
209 op->nframes,
210 (op->flags & RX_CHECK_DLC)?'d':' ');
211 if (op->j_ival1)
212 len += snprintf(page + len, PAGE_SIZE - len,
213 "timeo=%ld ", op->j_ival1);
214
215 if (op->j_ival2)
216 len += snprintf(page + len, PAGE_SIZE - len,
217 "thr=%ld ", op->j_ival2);
218
219 len += snprintf(page + len, PAGE_SIZE - len,
220 "# recv %ld (%ld) => reduction: ",
221 op->frames_filtered, op->frames_abs);
222
223 reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
224
225 len += snprintf(page + len, PAGE_SIZE - len, "%s%ld%%\n",
226 (reduction == 100)?"near ":"", reduction);
227
228 if (len > PAGE_SIZE - 200) {
229 /* mark output cut off */
230 len += snprintf(page + len, PAGE_SIZE - len, "(..)\n");
231 break;
232 }
233 }
234
235 list_for_each_entry(op, &bo->tx_ops, list) {
236
237 len += snprintf(page + len, PAGE_SIZE - len,
238 "tx_op: %03X %s [%d] ",
239 op->can_id, bcm_proc_getifname(op->ifindex),
240 op->nframes);
241 if (op->j_ival1)
242 len += snprintf(page + len, PAGE_SIZE - len, "t1=%ld ",
243 op->j_ival1);
244
245 if (op->j_ival2)
246 len += snprintf(page + len, PAGE_SIZE - len, "t2=%ld ",
247 op->j_ival2);
248
249 len += snprintf(page + len, PAGE_SIZE - len, "# sent %ld\n",
250 op->frames_abs);
251
252 if (len > PAGE_SIZE - 100) {
253 /* mark output cut off */
254 len += snprintf(page + len, PAGE_SIZE - len, "(..)\n");
255 break;
256 }
257 }
258
259 len += snprintf(page + len, PAGE_SIZE - len, "\n");
260
261 *eof = 1;
262 return len;
263}
264
265/*
266 * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface
267 * of the given bcm tx op
268 */
269static void bcm_can_tx(struct bcm_op *op)
270{
271 struct sk_buff *skb;
272 struct net_device *dev;
273 struct can_frame *cf = &op->frames[op->currframe];
274
275 /* no target device? => exit */
276 if (!op->ifindex)
277 return;
278
279 dev = dev_get_by_index(&init_net, op->ifindex);
280 if (!dev) {
281 /* RFC: should this bcm_op remove itself here? */
282 return;
283 }
284
285 skb = alloc_skb(CFSIZ, gfp_any());
286 if (!skb)
287 goto out;
288
289 memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
290
291 /* send with loopback */
292 skb->dev = dev;
293 skb->sk = op->sk;
294 can_send(skb, 1);
295
296 /* update statistics */
297 op->currframe++;
298 op->frames_abs++;
299
300 /* reached last frame? */
301 if (op->currframe >= op->nframes)
302 op->currframe = 0;
303 out:
304 dev_put(dev);
305}
306
307/*
308 * bcm_send_to_user - send a BCM message to the userspace
309 * (consisting of bcm_msg_head + x CAN frames)
310 */
311static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
312 struct can_frame *frames, int has_timestamp)
313{
314 struct sk_buff *skb;
315 struct can_frame *firstframe;
316 struct sockaddr_can *addr;
317 struct sock *sk = op->sk;
318 int datalen = head->nframes * CFSIZ;
319 int err;
320
321 skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
322 if (!skb)
323 return;
324
325 memcpy(skb_put(skb, sizeof(*head)), head, sizeof(*head));
326
327 if (head->nframes) {
328 /* can_frames starting here */
329 firstframe = (struct can_frame *) skb_tail_pointer(skb);
330
331 memcpy(skb_put(skb, datalen), frames, datalen);
332
333 /*
334 * the BCM uses the can_dlc-element of the can_frame
335 * structure for internal purposes. This is only
336 * relevant for updates that are generated by the
337 * BCM, where nframes is 1
338 */
339 if (head->nframes == 1)
340 firstframe->can_dlc &= BCM_CAN_DLC_MASK;
341 }
342
343 if (has_timestamp) {
344 /* restore rx timestamp */
345 skb->tstamp = op->rx_stamp;
346 }
347
348 /*
349 * Put the datagram to the queue so that bcm_recvmsg() can
350 * get it from there. We need to pass the interface index to
351 * bcm_recvmsg(). We pass a whole struct sockaddr_can in skb->cb
352 * containing the interface index.
353 */
354
355 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
356 addr = (struct sockaddr_can *)skb->cb;
357 memset(addr, 0, sizeof(*addr));
358 addr->can_family = AF_CAN;
359 addr->can_ifindex = op->rx_ifindex;
360
361 err = sock_queue_rcv_skb(sk, skb);
362 if (err < 0) {
363 struct bcm_sock *bo = bcm_sk(sk);
364
365 kfree_skb(skb);
366 /* don't care about overflows in this statistic */
367 bo->dropped_usr_msgs++;
368 }
369}
370
371/*
372 * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions
373 */
374static void bcm_tx_timeout_handler(unsigned long data)
375{
376 struct bcm_op *op = (struct bcm_op *)data;
377
378 if (op->j_ival1 && (op->count > 0)) {
379
380 op->count--;
381 if (!op->count && (op->flags & TX_COUNTEVT)) {
382 struct bcm_msg_head msg_head;
383
384 /* create notification to user */
385 msg_head.opcode = TX_EXPIRED;
386 msg_head.flags = op->flags;
387 msg_head.count = op->count;
388 msg_head.ival1 = op->ival1;
389 msg_head.ival2 = op->ival2;
390 msg_head.can_id = op->can_id;
391 msg_head.nframes = 0;
392
393 bcm_send_to_user(op, &msg_head, NULL, 0);
394 }
395 }
396
397 if (op->j_ival1 && (op->count > 0)) {
398
399 /* send (next) frame */
400 bcm_can_tx(op);
401 mod_timer(&op->timer, jiffies + op->j_ival1);
402
403 } else {
404 if (op->j_ival2) {
405
406 /* send (next) frame */
407 bcm_can_tx(op);
408 mod_timer(&op->timer, jiffies + op->j_ival2);
409 }
410 }
411
412 return;
413}
414
415/*
416 * bcm_rx_changed - create a RX_CHANGED notification due to changed content
417 */
418static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
419{
420 struct bcm_msg_head head;
421
422 op->j_lastmsg = jiffies;
423
424 /* update statistics */
425 op->frames_filtered++;
426
427 /* prevent statistics overflow */
428 if (op->frames_filtered > ULONG_MAX/100)
429 op->frames_filtered = op->frames_abs = 0;
430
431 head.opcode = RX_CHANGED;
432 head.flags = op->flags;
433 head.count = op->count;
434 head.ival1 = op->ival1;
435 head.ival2 = op->ival2;
436 head.can_id = op->can_id;
437 head.nframes = 1;
438
439 bcm_send_to_user(op, &head, data, 1);
440}
441
442/*
443 * bcm_rx_update_and_send - process a detected relevant receive content change
444 * 1. update the last received data
445 * 2. send a notification to the user (if possible)
446 */
447static void bcm_rx_update_and_send(struct bcm_op *op,
448 struct can_frame *lastdata,
449 struct can_frame *rxdata)
450{
451 unsigned long nexttx = op->j_lastmsg + op->j_ival2;
452
453 memcpy(lastdata, rxdata, CFSIZ);
454
455 /* mark as used */
456 lastdata->can_dlc |= RX_RECV;
457
458 /* throttle bcm_rx_changed ? */
459 if ((op->thrtimer.expires) ||
460 ((op->j_ival2) && (nexttx > jiffies))) {
461 /* we are already waiting OR we have to start waiting */
462
463 /* mark as 'throttled' */
464 lastdata->can_dlc |= RX_THR;
465
466 if (!(op->thrtimer.expires)) {
467 /* start the timer only the first time */
468 mod_timer(&op->thrtimer, nexttx);
469 }
470
471 } else {
472 /* send RX_CHANGED to the user immediately */
473 bcm_rx_changed(op, rxdata);
474 }
475}
476
477/*
478 * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly
479 * received data stored in op->last_frames[]
480 */
481static void bcm_rx_cmp_to_index(struct bcm_op *op, int index,
482 struct can_frame *rxdata)
483{
484 /*
485 * no one uses the MSBs of can_dlc for comparation,
486 * so we use it here to detect the first time of reception
487 */
488
489 if (!(op->last_frames[index].can_dlc & RX_RECV)) {
490 /* received data for the first time => send update to user */
491 bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
492 return;
493 }
494
495 /* do a real check in can_frame data section */
496
497 if ((GET_U64(&op->frames[index]) & GET_U64(rxdata)) !=
498 (GET_U64(&op->frames[index]) & GET_U64(&op->last_frames[index]))) {
499 bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
500 return;
501 }
502
503 if (op->flags & RX_CHECK_DLC) {
504 /* do a real check in can_frame dlc */
505 if (rxdata->can_dlc != (op->last_frames[index].can_dlc &
506 BCM_CAN_DLC_MASK)) {
507 bcm_rx_update_and_send(op, &op->last_frames[index],
508 rxdata);
509 return;
510 }
511 }
512}
513
514/*
515 * bcm_rx_starttimer - enable timeout monitoring for CAN frame receiption
516 */
517static void bcm_rx_starttimer(struct bcm_op *op)
518{
519 if (op->flags & RX_NO_AUTOTIMER)
520 return;
521
522 if (op->j_ival1)
523 mod_timer(&op->timer, jiffies + op->j_ival1);
524}
525
526/*
527 * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out
528 */
529static void bcm_rx_timeout_handler(unsigned long data)
530{
531 struct bcm_op *op = (struct bcm_op *)data;
532 struct bcm_msg_head msg_head;
533
534 msg_head.opcode = RX_TIMEOUT;
535 msg_head.flags = op->flags;
536 msg_head.count = op->count;
537 msg_head.ival1 = op->ival1;
538 msg_head.ival2 = op->ival2;
539 msg_head.can_id = op->can_id;
540 msg_head.nframes = 0;
541
542 bcm_send_to_user(op, &msg_head, NULL, 0);
543
544 /* no restart of the timer is done here! */
545
546 /* if user wants to be informed, when cyclic CAN-Messages come back */
547 if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
548 /* clear received can_frames to indicate 'nothing received' */
549 memset(op->last_frames, 0, op->nframes * CFSIZ);
550 }
551}
552
553/*
554 * bcm_rx_thr_handler - the time for blocked content updates is over now:
555 * Check for throttled data and send it to the userspace
556 */
557static void bcm_rx_thr_handler(unsigned long data)
558{
559 struct bcm_op *op = (struct bcm_op *)data;
560 int i = 0;
561
562 /* mark disabled / consumed timer */
563 op->thrtimer.expires = 0;
564
565 if (op->nframes > 1) {
566 /* for MUX filter we start at index 1 */
567 for (i = 1; i < op->nframes; i++) {
568 if ((op->last_frames) &&
569 (op->last_frames[i].can_dlc & RX_THR)) {
570 op->last_frames[i].can_dlc &= ~RX_THR;
571 bcm_rx_changed(op, &op->last_frames[i]);
572 }
573 }
574
575 } else {
576 /* for RX_FILTER_ID and simple filter */
577 if (op->last_frames && (op->last_frames[0].can_dlc & RX_THR)) {
578 op->last_frames[0].can_dlc &= ~RX_THR;
579 bcm_rx_changed(op, &op->last_frames[0]);
580 }
581 }
582}
583
584/*
585 * bcm_rx_handler - handle a CAN frame receiption
586 */
587static void bcm_rx_handler(struct sk_buff *skb, void *data)
588{
589 struct bcm_op *op = (struct bcm_op *)data;
590 struct can_frame rxframe;
591 int i;
592
593 /* disable timeout */
594 del_timer(&op->timer);
595
596 if (skb->len == sizeof(rxframe)) {
597 memcpy(&rxframe, skb->data, sizeof(rxframe));
598 /* save rx timestamp */
599 op->rx_stamp = skb->tstamp;
600 /* save originator for recvfrom() */
601 op->rx_ifindex = skb->dev->ifindex;
602 /* update statistics */
603 op->frames_abs++;
604 kfree_skb(skb);
605
606 } else {
607 kfree_skb(skb);
608 return;
609 }
610
611 if (op->can_id != rxframe.can_id)
612 return;
613
614 if (op->flags & RX_RTR_FRAME) {
615 /* send reply for RTR-request (placed in op->frames[0]) */
616 bcm_can_tx(op);
617 return;
618 }
619
620 if (op->flags & RX_FILTER_ID) {
621 /* the easiest case */
622 bcm_rx_update_and_send(op, &op->last_frames[0], &rxframe);
623 bcm_rx_starttimer(op);
624 return;
625 }
626
627 if (op->nframes == 1) {
628 /* simple compare with index 0 */
629 bcm_rx_cmp_to_index(op, 0, &rxframe);
630 bcm_rx_starttimer(op);
631 return;
632 }
633
634 if (op->nframes > 1) {
635 /*
636 * multiplex compare
637 *
638 * find the first multiplex mask that fits.
639 * Remark: The MUX-mask is stored in index 0
640 */
641
642 for (i = 1; i < op->nframes; i++) {
643 if ((GET_U64(&op->frames[0]) & GET_U64(&rxframe)) ==
644 (GET_U64(&op->frames[0]) &
645 GET_U64(&op->frames[i]))) {
646 bcm_rx_cmp_to_index(op, i, &rxframe);
647 break;
648 }
649 }
650 bcm_rx_starttimer(op);
651 }
652}
653
654/*
655 * helpers for bcm_op handling: find & delete bcm [rx|tx] op elements
656 */
657static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id,
658 int ifindex)
659{
660 struct bcm_op *op;
661
662 list_for_each_entry(op, ops, list) {
663 if ((op->can_id == can_id) && (op->ifindex == ifindex))
664 return op;
665 }
666
667 return NULL;
668}
669
670static void bcm_remove_op(struct bcm_op *op)
671{
672 del_timer(&op->timer);
673 del_timer(&op->thrtimer);
674
675 if ((op->frames) && (op->frames != &op->sframe))
676 kfree(op->frames);
677
678 if ((op->last_frames) && (op->last_frames != &op->last_sframe))
679 kfree(op->last_frames);
680
681 kfree(op);
682
683 return;
684}
685
686static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
687{
688 if (op->rx_reg_dev == dev) {
689 can_rx_unregister(dev, op->can_id, REGMASK(op->can_id),
690 bcm_rx_handler, op);
691
692 /* mark as removed subscription */
693 op->rx_reg_dev = NULL;
694 } else
695 printk(KERN_ERR "can-bcm: bcm_rx_unreg: registered device "
696 "mismatch %p %p\n", op->rx_reg_dev, dev);
697}
698
699/*
700 * bcm_delete_rx_op - find and remove a rx op (returns number of removed ops)
701 */
702static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
703{
704 struct bcm_op *op, *n;
705
706 list_for_each_entry_safe(op, n, ops, list) {
707 if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
708
709 /*
710 * Don't care if we're bound or not (due to netdev
711 * problems) can_rx_unregister() is always a save
712 * thing to do here.
713 */
714 if (op->ifindex) {
715 /*
716 * Only remove subscriptions that had not
717 * been removed due to NETDEV_UNREGISTER
718 * in bcm_notifier()
719 */
720 if (op->rx_reg_dev) {
721 struct net_device *dev;
722
723 dev = dev_get_by_index(&init_net,
724 op->ifindex);
725 if (dev) {
726 bcm_rx_unreg(dev, op);
727 dev_put(dev);
728 }
729 }
730 } else
731 can_rx_unregister(NULL, op->can_id,
732 REGMASK(op->can_id),
733 bcm_rx_handler, op);
734
735 list_del(&op->list);
736 bcm_remove_op(op);
737 return 1; /* done */
738 }
739 }
740
741 return 0; /* not found */
742}
743
744/*
745 * bcm_delete_tx_op - find and remove a tx op (returns number of removed ops)
746 */
747static int bcm_delete_tx_op(struct list_head *ops, canid_t can_id, int ifindex)
748{
749 struct bcm_op *op, *n;
750
751 list_for_each_entry_safe(op, n, ops, list) {
752 if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
753 list_del(&op->list);
754 bcm_remove_op(op);
755 return 1; /* done */
756 }
757 }
758
759 return 0; /* not found */
760}
761
762/*
763 * bcm_read_op - read out a bcm_op and send it to the user (for bcm_sendmsg)
764 */
765static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head,
766 int ifindex)
767{
768 struct bcm_op *op = bcm_find_op(ops, msg_head->can_id, ifindex);
769
770 if (!op)
771 return -EINVAL;
772
773 /* put current values into msg_head */
774 msg_head->flags = op->flags;
775 msg_head->count = op->count;
776 msg_head->ival1 = op->ival1;
777 msg_head->ival2 = op->ival2;
778 msg_head->nframes = op->nframes;
779
780 bcm_send_to_user(op, msg_head, op->frames, 0);
781
782 return MHSIZ;
783}
784
785/*
786 * bcm_tx_setup - create or update a bcm tx op (for bcm_sendmsg)
787 */
788static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
789 int ifindex, struct sock *sk)
790{
791 struct bcm_sock *bo = bcm_sk(sk);
792 struct bcm_op *op;
793 int i, err;
794
795 /* we need a real device to send frames */
796 if (!ifindex)
797 return -ENODEV;
798
799 /* we need at least one can_frame */
800 if (msg_head->nframes < 1)
801 return -EINVAL;
802
803 /* check the given can_id */
804 op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex);
805
806 if (op) {
807 /* update existing BCM operation */
808
809 /*
810 * Do we need more space for the can_frames than currently
811 * allocated? -> This is a _really_ unusual use-case and
812 * therefore (complexity / locking) it is not supported.
813 */
814 if (msg_head->nframes > op->nframes)
815 return -E2BIG;
816
817 /* update can_frames content */
818 for (i = 0; i < msg_head->nframes; i++) {
819 err = memcpy_fromiovec((u8 *)&op->frames[i],
820 msg->msg_iov, CFSIZ);
821 if (err < 0)
822 return err;
823
824 if (msg_head->flags & TX_CP_CAN_ID) {
825 /* copy can_id into frame */
826 op->frames[i].can_id = msg_head->can_id;
827 }
828 }
829
830 } else {
831 /* insert new BCM operation for the given can_id */
832
833 op = kzalloc(OPSIZ, GFP_KERNEL);
834 if (!op)
835 return -ENOMEM;
836
837 op->can_id = msg_head->can_id;
838
839 /* create array for can_frames and copy the data */
840 if (msg_head->nframes > 1) {
841 op->frames = kmalloc(msg_head->nframes * CFSIZ,
842 GFP_KERNEL);
843 if (!op->frames) {
844 kfree(op);
845 return -ENOMEM;
846 }
847 } else
848 op->frames = &op->sframe;
849
850 for (i = 0; i < msg_head->nframes; i++) {
851 err = memcpy_fromiovec((u8 *)&op->frames[i],
852 msg->msg_iov, CFSIZ);
853 if (err < 0) {
854 if (op->frames != &op->sframe)
855 kfree(op->frames);
856 kfree(op);
857 return err;
858 }
859
860 if (msg_head->flags & TX_CP_CAN_ID) {
861 /* copy can_id into frame */
862 op->frames[i].can_id = msg_head->can_id;
863 }
864 }
865
866 /* tx_ops never compare with previous received messages */
867 op->last_frames = NULL;
868
869 /* bcm_can_tx / bcm_tx_timeout_handler needs this */
870 op->sk = sk;
871 op->ifindex = ifindex;
872
873 /* initialize uninitialized (kzalloc) structure */
874 setup_timer(&op->timer, bcm_tx_timeout_handler,
875 (unsigned long)op);
876
877 /* currently unused in tx_ops */
878 init_timer(&op->thrtimer);
879
880 /* add this bcm_op to the list of the tx_ops */
881 list_add(&op->list, &bo->tx_ops);
882
883 } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
884
885 if (op->nframes != msg_head->nframes) {
886 op->nframes = msg_head->nframes;
887 /* start multiple frame transmission with index 0 */
888 op->currframe = 0;
889 }
890
891 /* check flags */
892
893 op->flags = msg_head->flags;
894
895 if (op->flags & TX_RESET_MULTI_IDX) {
896 /* start multiple frame transmission with index 0 */
897 op->currframe = 0;
898 }
899
900 if (op->flags & SETTIMER) {
901 /* set timer values */
902 op->count = msg_head->count;
903 op->ival1 = msg_head->ival1;
904 op->ival2 = msg_head->ival2;
905 op->j_ival1 = rounded_tv2jif(&msg_head->ival1);
906 op->j_ival2 = rounded_tv2jif(&msg_head->ival2);
907
908 /* disable an active timer due to zero values? */
909 if (!op->j_ival1 && !op->j_ival2)
910 del_timer(&op->timer);
911 }
912
913 if ((op->flags & STARTTIMER) &&
914 ((op->j_ival1 && op->count) || op->j_ival2)) {
915
916 /* spec: send can_frame when starting timer */
917 op->flags |= TX_ANNOUNCE;
918
919 if (op->j_ival1 && (op->count > 0)) {
920 /* op->count-- is done in bcm_tx_timeout_handler */
921 mod_timer(&op->timer, jiffies + op->j_ival1);
922 } else
923 mod_timer(&op->timer, jiffies + op->j_ival2);
924 }
925
926 if (op->flags & TX_ANNOUNCE)
927 bcm_can_tx(op);
928
929 return msg_head->nframes * CFSIZ + MHSIZ;
930}
931
932/*
933 * bcm_rx_setup - create or update a bcm rx op (for bcm_sendmsg)
934 */
935static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
936 int ifindex, struct sock *sk)
937{
938 struct bcm_sock *bo = bcm_sk(sk);
939 struct bcm_op *op;
940 int do_rx_register;
941 int err = 0;
942
943 if ((msg_head->flags & RX_FILTER_ID) || (!(msg_head->nframes))) {
944 /* be robust against wrong usage ... */
945 msg_head->flags |= RX_FILTER_ID;
946 /* ignore trailing garbage */
947 msg_head->nframes = 0;
948 }
949
950 if ((msg_head->flags & RX_RTR_FRAME) &&
951 ((msg_head->nframes != 1) ||
952 (!(msg_head->can_id & CAN_RTR_FLAG))))
953 return -EINVAL;
954
955 /* check the given can_id */
956 op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex);
957 if (op) {
958 /* update existing BCM operation */
959
960 /*
961 * Do we need more space for the can_frames than currently
962 * allocated? -> This is a _really_ unusual use-case and
963 * therefore (complexity / locking) it is not supported.
964 */
965 if (msg_head->nframes > op->nframes)
966 return -E2BIG;
967
968 if (msg_head->nframes) {
969 /* update can_frames content */
970 err = memcpy_fromiovec((u8 *)op->frames,
971 msg->msg_iov,
972 msg_head->nframes * CFSIZ);
973 if (err < 0)
974 return err;
975
976 /* clear last_frames to indicate 'nothing received' */
977 memset(op->last_frames, 0, msg_head->nframes * CFSIZ);
978 }
979
980 op->nframes = msg_head->nframes;
981
982 /* Only an update -> do not call can_rx_register() */
983 do_rx_register = 0;
984
985 } else {
986 /* insert new BCM operation for the given can_id */
987 op = kzalloc(OPSIZ, GFP_KERNEL);
988 if (!op)
989 return -ENOMEM;
990
991 op->can_id = msg_head->can_id;
992 op->nframes = msg_head->nframes;
993
994 if (msg_head->nframes > 1) {
995 /* create array for can_frames and copy the data */
996 op->frames = kmalloc(msg_head->nframes * CFSIZ,
997 GFP_KERNEL);
998 if (!op->frames) {
999 kfree(op);
1000 return -ENOMEM;
1001 }
1002
1003 /* create and init array for received can_frames */
1004 op->last_frames = kzalloc(msg_head->nframes * CFSIZ,
1005 GFP_KERNEL);
1006 if (!op->last_frames) {
1007 kfree(op->frames);
1008 kfree(op);
1009 return -ENOMEM;
1010 }
1011
1012 } else {
1013 op->frames = &op->sframe;
1014 op->last_frames = &op->last_sframe;
1015 }
1016
1017 if (msg_head->nframes) {
1018 err = memcpy_fromiovec((u8 *)op->frames, msg->msg_iov,
1019 msg_head->nframes * CFSIZ);
1020 if (err < 0) {
1021 if (op->frames != &op->sframe)
1022 kfree(op->frames);
1023 if (op->last_frames != &op->last_sframe)
1024 kfree(op->last_frames);
1025 kfree(op);
1026 return err;
1027 }
1028 }
1029
1030 /* bcm_can_tx / bcm_tx_timeout_handler needs this */
1031 op->sk = sk;
1032 op->ifindex = ifindex;
1033
1034 /* initialize uninitialized (kzalloc) structure */
1035 setup_timer(&op->timer, bcm_rx_timeout_handler,
1036 (unsigned long)op);
1037
1038 /* init throttle timer for RX_CHANGED */
1039 setup_timer(&op->thrtimer, bcm_rx_thr_handler,
1040 (unsigned long)op);
1041
1042 /* mark disabled timer */
1043 op->thrtimer.expires = 0;
1044
1045 /* add this bcm_op to the list of the rx_ops */
1046 list_add(&op->list, &bo->rx_ops);
1047
1048 /* call can_rx_register() */
1049 do_rx_register = 1;
1050
1051 } /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */
1052
1053 /* check flags */
1054 op->flags = msg_head->flags;
1055
1056 if (op->flags & RX_RTR_FRAME) {
1057
1058 /* no timers in RTR-mode */
1059 del_timer(&op->thrtimer);
1060 del_timer(&op->timer);
1061
1062 /*
1063 * funny feature in RX(!)_SETUP only for RTR-mode:
1064 * copy can_id into frame BUT without RTR-flag to
1065 * prevent a full-load-loopback-test ... ;-]
1066 */
1067 if ((op->flags & TX_CP_CAN_ID) ||
1068 (op->frames[0].can_id == op->can_id))
1069 op->frames[0].can_id = op->can_id & ~CAN_RTR_FLAG;
1070
1071 } else {
1072 if (op->flags & SETTIMER) {
1073
1074 /* set timer value */
1075 op->ival1 = msg_head->ival1;
1076 op->ival2 = msg_head->ival2;
1077 op->j_ival1 = rounded_tv2jif(&msg_head->ival1);
1078 op->j_ival2 = rounded_tv2jif(&msg_head->ival2);
1079
1080 /* disable an active timer due to zero value? */
1081 if (!op->j_ival1)
1082 del_timer(&op->timer);
1083
1084 /* free currently blocked msgs ? */
1085 if (op->thrtimer.expires) {
1086 /* send blocked msgs hereafter */
1087 mod_timer(&op->thrtimer, jiffies + 2);
1088 }
1089
1090 /*
1091 * if (op->j_ival2) is zero, no (new) throttling
1092 * will happen. For details see functions
1093 * bcm_rx_update_and_send() and bcm_rx_thr_handler()
1094 */
1095 }
1096
1097 if ((op->flags & STARTTIMER) && op->j_ival1)
1098 mod_timer(&op->timer, jiffies + op->j_ival1);
1099 }
1100
1101 /* now we can register for can_ids, if we added a new bcm_op */
1102 if (do_rx_register) {
1103 if (ifindex) {
1104 struct net_device *dev;
1105
1106 dev = dev_get_by_index(&init_net, ifindex);
1107 if (dev) {
1108 err = can_rx_register(dev, op->can_id,
1109 REGMASK(op->can_id),
1110 bcm_rx_handler, op,
1111 "bcm");
1112
1113 op->rx_reg_dev = dev;
1114 dev_put(dev);
1115 }
1116
1117 } else
1118 err = can_rx_register(NULL, op->can_id,
1119 REGMASK(op->can_id),
1120 bcm_rx_handler, op, "bcm");
1121 if (err) {
1122 /* this bcm rx op is broken -> remove it */
1123 list_del(&op->list);
1124 bcm_remove_op(op);
1125 return err;
1126 }
1127 }
1128
1129 return msg_head->nframes * CFSIZ + MHSIZ;
1130}
1131
1132/*
1133 * bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg)
1134 */
1135static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
1136{
1137 struct sk_buff *skb;
1138 struct net_device *dev;
1139 int err;
1140
1141 /* we need a real device to send frames */
1142 if (!ifindex)
1143 return -ENODEV;
1144
1145 skb = alloc_skb(CFSIZ, GFP_KERNEL);
1146
1147 if (!skb)
1148 return -ENOMEM;
1149
1150 err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
1151 if (err < 0) {
1152 kfree_skb(skb);
1153 return err;
1154 }
1155
1156 dev = dev_get_by_index(&init_net, ifindex);
1157 if (!dev) {
1158 kfree_skb(skb);
1159 return -ENODEV;
1160 }
1161
1162 skb->dev = dev;
1163 skb->sk = sk;
1164 can_send(skb, 1); /* send with loopback */
1165 dev_put(dev);
1166
1167 return CFSIZ + MHSIZ;
1168}
1169
1170/*
1171 * bcm_sendmsg - process BCM commands (opcodes) from the userspace
1172 */
1173static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
1174 struct msghdr *msg, size_t size)
1175{
1176 struct sock *sk = sock->sk;
1177 struct bcm_sock *bo = bcm_sk(sk);
1178 int ifindex = bo->ifindex; /* default ifindex for this bcm_op */
1179 struct bcm_msg_head msg_head;
1180 int ret; /* read bytes or error codes as return value */
1181
1182 if (!bo->bound)
1183 return -ENOTCONN;
1184
1185 /* check for alternative ifindex for this bcm_op */
1186
1187 if (!ifindex && msg->msg_name) {
1188 /* no bound device as default => check msg_name */
1189 struct sockaddr_can *addr =
1190 (struct sockaddr_can *)msg->msg_name;
1191
1192 if (addr->can_family != AF_CAN)
1193 return -EINVAL;
1194
1195 /* ifindex from sendto() */
1196 ifindex = addr->can_ifindex;
1197
1198 if (ifindex) {
1199 struct net_device *dev;
1200
1201 dev = dev_get_by_index(&init_net, ifindex);
1202 if (!dev)
1203 return -ENODEV;
1204
1205 if (dev->type != ARPHRD_CAN) {
1206 dev_put(dev);
1207 return -ENODEV;
1208 }
1209
1210 dev_put(dev);
1211 }
1212 }
1213
1214 /* read message head information */
1215
1216 ret = memcpy_fromiovec((u8 *)&msg_head, msg->msg_iov, MHSIZ);
1217 if (ret < 0)
1218 return ret;
1219
1220 lock_sock(sk);
1221
1222 switch (msg_head.opcode) {
1223
1224 case TX_SETUP:
1225 ret = bcm_tx_setup(&msg_head, msg, ifindex, sk);
1226 break;
1227
1228 case RX_SETUP:
1229 ret = bcm_rx_setup(&msg_head, msg, ifindex, sk);
1230 break;
1231
1232 case TX_DELETE:
1233 if (bcm_delete_tx_op(&bo->tx_ops, msg_head.can_id, ifindex))
1234 ret = MHSIZ;
1235 else
1236 ret = -EINVAL;
1237 break;
1238
1239 case RX_DELETE:
1240 if (bcm_delete_rx_op(&bo->rx_ops, msg_head.can_id, ifindex))
1241 ret = MHSIZ;
1242 else
1243 ret = -EINVAL;
1244 break;
1245
1246 case TX_READ:
1247 /* reuse msg_head for the reply to TX_READ */
1248 msg_head.opcode = TX_STATUS;
1249 ret = bcm_read_op(&bo->tx_ops, &msg_head, ifindex);
1250 break;
1251
1252 case RX_READ:
1253 /* reuse msg_head for the reply to RX_READ */
1254 msg_head.opcode = RX_STATUS;
1255 ret = bcm_read_op(&bo->rx_ops, &msg_head, ifindex);
1256 break;
1257
1258 case TX_SEND:
1259 /* we need at least one can_frame */
1260 if (msg_head.nframes < 1)
1261 ret = -EINVAL;
1262 else
1263 ret = bcm_tx_send(msg, ifindex, sk);
1264 break;
1265
1266 default:
1267 ret = -EINVAL;
1268 break;
1269 }
1270
1271 release_sock(sk);
1272
1273 return ret;
1274}
1275
1276/*
1277 * notification handler for netdevice status changes
1278 */
1279static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
1280 void *data)
1281{
1282 struct net_device *dev = (struct net_device *)data;
1283 struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
1284 struct sock *sk = &bo->sk;
1285 struct bcm_op *op;
1286 int notify_enodev = 0;
1287
1288 if (dev->nd_net != &init_net)
1289 return NOTIFY_DONE;
1290
1291 if (dev->type != ARPHRD_CAN)
1292 return NOTIFY_DONE;
1293
1294 switch (msg) {
1295
1296 case NETDEV_UNREGISTER:
1297 lock_sock(sk);
1298
1299 /* remove device specific receive entries */
1300 list_for_each_entry(op, &bo->rx_ops, list)
1301 if (op->rx_reg_dev == dev)
1302 bcm_rx_unreg(dev, op);
1303
1304 /* remove device reference, if this is our bound device */
1305 if (bo->bound && bo->ifindex == dev->ifindex) {
1306 bo->bound = 0;
1307 bo->ifindex = 0;
1308 notify_enodev = 1;
1309 }
1310
1311 release_sock(sk);
1312
1313 if (notify_enodev) {
1314 sk->sk_err = ENODEV;
1315 if (!sock_flag(sk, SOCK_DEAD))
1316 sk->sk_error_report(sk);
1317 }
1318 break;
1319
1320 case NETDEV_DOWN:
1321 if (bo->bound && bo->ifindex == dev->ifindex) {
1322 sk->sk_err = ENETDOWN;
1323 if (!sock_flag(sk, SOCK_DEAD))
1324 sk->sk_error_report(sk);
1325 }
1326 }
1327
1328 return NOTIFY_DONE;
1329}
1330
1331/*
1332 * initial settings for all BCM sockets to be set at socket creation time
1333 */
1334static int bcm_init(struct sock *sk)
1335{
1336 struct bcm_sock *bo = bcm_sk(sk);
1337
1338 bo->bound = 0;
1339 bo->ifindex = 0;
1340 bo->dropped_usr_msgs = 0;
1341 bo->bcm_proc_read = NULL;
1342
1343 INIT_LIST_HEAD(&bo->tx_ops);
1344 INIT_LIST_HEAD(&bo->rx_ops);
1345
1346 /* set notifier */
1347 bo->notifier.notifier_call = bcm_notifier;
1348
1349 register_netdevice_notifier(&bo->notifier);
1350
1351 return 0;
1352}
1353
1354/*
1355 * standard socket functions
1356 */
1357static int bcm_release(struct socket *sock)
1358{
1359 struct sock *sk = sock->sk;
1360 struct bcm_sock *bo = bcm_sk(sk);
1361 struct bcm_op *op, *next;
1362
1363 /* remove bcm_ops, timer, rx_unregister(), etc. */
1364
1365 unregister_netdevice_notifier(&bo->notifier);
1366
1367 lock_sock(sk);
1368
1369 list_for_each_entry_safe(op, next, &bo->tx_ops, list)
1370 bcm_remove_op(op);
1371
1372 list_for_each_entry_safe(op, next, &bo->rx_ops, list) {
1373 /*
1374 * Don't care if we're bound or not (due to netdev problems)
1375 * can_rx_unregister() is always a save thing to do here.
1376 */
1377 if (op->ifindex) {
1378 /*
1379 * Only remove subscriptions that had not
1380 * been removed due to NETDEV_UNREGISTER
1381 * in bcm_notifier()
1382 */
1383 if (op->rx_reg_dev) {
1384 struct net_device *dev;
1385
1386 dev = dev_get_by_index(&init_net, op->ifindex);
1387 if (dev) {
1388 bcm_rx_unreg(dev, op);
1389 dev_put(dev);
1390 }
1391 }
1392 } else
1393 can_rx_unregister(NULL, op->can_id,
1394 REGMASK(op->can_id),
1395 bcm_rx_handler, op);
1396
1397 bcm_remove_op(op);
1398 }
1399
1400 /* remove procfs entry */
1401 if (proc_dir && bo->bcm_proc_read)
1402 remove_proc_entry(bo->procname, proc_dir);
1403
1404 /* remove device reference */
1405 if (bo->bound) {
1406 bo->bound = 0;
1407 bo->ifindex = 0;
1408 }
1409
1410 release_sock(sk);
1411 sock_put(sk);
1412
1413 return 0;
1414}
1415
1416static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
1417 int flags)
1418{
1419 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
1420 struct sock *sk = sock->sk;
1421 struct bcm_sock *bo = bcm_sk(sk);
1422
1423 if (bo->bound)
1424 return -EISCONN;
1425
1426 /* bind a device to this socket */
1427 if (addr->can_ifindex) {
1428 struct net_device *dev;
1429
1430 dev = dev_get_by_index(&init_net, addr->can_ifindex);
1431 if (!dev)
1432 return -ENODEV;
1433
1434 if (dev->type != ARPHRD_CAN) {
1435 dev_put(dev);
1436 return -ENODEV;
1437 }
1438
1439 bo->ifindex = dev->ifindex;
1440 dev_put(dev);
1441
1442 } else {
1443 /* no interface reference for ifindex = 0 ('any' CAN device) */
1444 bo->ifindex = 0;
1445 }
1446
1447 bo->bound = 1;
1448
1449 if (proc_dir) {
1450 /* unique socket address as filename */
1451 sprintf(bo->procname, "%p", sock);
1452 bo->bcm_proc_read = create_proc_read_entry(bo->procname, 0644,
1453 proc_dir,
1454 bcm_read_proc, sk);
1455 }
1456
1457 return 0;
1458}
1459
1460static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
1461 struct msghdr *msg, size_t size, int flags)
1462{
1463 struct sock *sk = sock->sk;
1464 struct sk_buff *skb;
1465 int error = 0;
1466 int noblock;
1467 int err;
1468
1469 noblock = flags & MSG_DONTWAIT;
1470 flags &= ~MSG_DONTWAIT;
1471 skb = skb_recv_datagram(sk, flags, noblock, &error);
1472 if (!skb)
1473 return error;
1474
1475 if (skb->len < size)
1476 size = skb->len;
1477
1478 err = memcpy_toiovec(msg->msg_iov, skb->data, size);
1479 if (err < 0) {
1480 skb_free_datagram(sk, skb);
1481 return err;
1482 }
1483
1484 sock_recv_timestamp(msg, sk, skb);
1485
1486 if (msg->msg_name) {
1487 msg->msg_namelen = sizeof(struct sockaddr_can);
1488 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1489 }
1490
1491 skb_free_datagram(sk, skb);
1492
1493 return size;
1494}
1495
1496static struct proto_ops bcm_ops __read_mostly = {
1497 .family = PF_CAN,
1498 .release = bcm_release,
1499 .bind = sock_no_bind,
1500 .connect = bcm_connect,
1501 .socketpair = sock_no_socketpair,
1502 .accept = sock_no_accept,
1503 .getname = sock_no_getname,
1504 .poll = datagram_poll,
1505 .ioctl = NULL, /* use can_ioctl() from af_can.c */
1506 .listen = sock_no_listen,
1507 .shutdown = sock_no_shutdown,
1508 .setsockopt = sock_no_setsockopt,
1509 .getsockopt = sock_no_getsockopt,
1510 .sendmsg = bcm_sendmsg,
1511 .recvmsg = bcm_recvmsg,
1512 .mmap = sock_no_mmap,
1513 .sendpage = sock_no_sendpage,
1514};
1515
1516static struct proto bcm_proto __read_mostly = {
1517 .name = "CAN_BCM",
1518 .owner = THIS_MODULE,
1519 .obj_size = sizeof(struct bcm_sock),
1520 .init = bcm_init,
1521};
1522
1523static struct can_proto bcm_can_proto __read_mostly = {
1524 .type = SOCK_DGRAM,
1525 .protocol = CAN_BCM,
1526 .capability = -1,
1527 .ops = &bcm_ops,
1528 .prot = &bcm_proto,
1529};
1530
1531static int __init bcm_module_init(void)
1532{
1533 int err;
1534
1535 printk(banner);
1536
1537 err = can_proto_register(&bcm_can_proto);
1538 if (err < 0) {
1539 printk(KERN_ERR "can: registration of bcm protocol failed\n");
1540 return err;
1541 }
1542
1543 /* create /proc/net/can-bcm directory */
1544 proc_dir = proc_mkdir("can-bcm", init_net.proc_net);
1545
1546 if (proc_dir)
1547 proc_dir->owner = THIS_MODULE;
1548
1549 return 0;
1550}
1551
1552static void __exit bcm_module_exit(void)
1553{
1554 can_proto_unregister(&bcm_can_proto);
1555
1556 if (proc_dir)
1557 proc_net_remove(&init_net, "can-bcm");
1558}
1559
1560module_init(bcm_module_init);
1561module_exit(bcm_module_exit);
diff --git a/net/can/proc.c b/net/can/proc.c
new file mode 100644
index 000000000000..520fef5e5398
--- /dev/null
+++ b/net/can/proc.c
@@ -0,0 +1,533 @@
1/*
2 * proc.c - procfs support for Protocol family CAN core module
3 *
4 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Volkswagen nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * Alternatively, provided that this notice is retained in full, this
20 * software may be distributed under the terms of the GNU General
21 * Public License ("GPL") version 2, in which case the provisions of the
22 * GPL apply INSTEAD OF those given above.
23 *
24 * The provided data structures and external interfaces from this code
25 * are not restricted to be used by modules with a GPL compatible license.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 *
40 * Send feedback to <socketcan-users@lists.berlios.de>
41 *
42 */
43
44#include <linux/module.h>
45#include <linux/proc_fs.h>
46#include <linux/list.h>
47#include <linux/rcupdate.h>
48#include <linux/can/core.h>
49
50#include "af_can.h"
51
52/*
53 * proc filenames for the PF_CAN core
54 */
55
56#define CAN_PROC_VERSION "version"
57#define CAN_PROC_STATS "stats"
58#define CAN_PROC_RESET_STATS "reset_stats"
59#define CAN_PROC_RCVLIST_ALL "rcvlist_all"
60#define CAN_PROC_RCVLIST_FIL "rcvlist_fil"
61#define CAN_PROC_RCVLIST_INV "rcvlist_inv"
62#define CAN_PROC_RCVLIST_SFF "rcvlist_sff"
63#define CAN_PROC_RCVLIST_EFF "rcvlist_eff"
64#define CAN_PROC_RCVLIST_ERR "rcvlist_err"
65
66static struct proc_dir_entry *can_dir;
67static struct proc_dir_entry *pde_version;
68static struct proc_dir_entry *pde_stats;
69static struct proc_dir_entry *pde_reset_stats;
70static struct proc_dir_entry *pde_rcvlist_all;
71static struct proc_dir_entry *pde_rcvlist_fil;
72static struct proc_dir_entry *pde_rcvlist_inv;
73static struct proc_dir_entry *pde_rcvlist_sff;
74static struct proc_dir_entry *pde_rcvlist_eff;
75static struct proc_dir_entry *pde_rcvlist_err;
76
77static int user_reset;
78
79static const char rx_list_name[][8] = {
80 [RX_ERR] = "rx_err",
81 [RX_ALL] = "rx_all",
82 [RX_FIL] = "rx_fil",
83 [RX_INV] = "rx_inv",
84 [RX_EFF] = "rx_eff",
85};
86
87/*
88 * af_can statistics stuff
89 */
90
91static void can_init_stats(void)
92{
93 /*
94 * This memset function is called from a timer context (when
95 * can_stattimer is active which is the default) OR in a process
96 * context (reading the proc_fs when can_stattimer is disabled).
97 */
98 memset(&can_stats, 0, sizeof(can_stats));
99 can_stats.jiffies_init = jiffies;
100
101 can_pstats.stats_reset++;
102
103 if (user_reset) {
104 user_reset = 0;
105 can_pstats.user_reset++;
106 }
107}
108
109static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
110 unsigned long count)
111{
112 unsigned long rate;
113
114 if (oldjif == newjif)
115 return 0;
116
117 /* see can_stat_update() - this should NEVER happen! */
118 if (count > (ULONG_MAX / HZ)) {
119 printk(KERN_ERR "can: calc_rate: count exceeded! %ld\n",
120 count);
121 return 99999999;
122 }
123
124 rate = (count * HZ) / (newjif - oldjif);
125
126 return rate;
127}
128
129void can_stat_update(unsigned long data)
130{
131 unsigned long j = jiffies; /* snapshot */
132
133 /* restart counting in timer context on user request */
134 if (user_reset)
135 can_init_stats();
136
137 /* restart counting on jiffies overflow */
138 if (j < can_stats.jiffies_init)
139 can_init_stats();
140
141 /* prevent overflow in calc_rate() */
142 if (can_stats.rx_frames > (ULONG_MAX / HZ))
143 can_init_stats();
144
145 /* prevent overflow in calc_rate() */
146 if (can_stats.tx_frames > (ULONG_MAX / HZ))
147 can_init_stats();
148
149 /* matches overflow - very improbable */
150 if (can_stats.matches > (ULONG_MAX / 100))
151 can_init_stats();
152
153 /* calc total values */
154 if (can_stats.rx_frames)
155 can_stats.total_rx_match_ratio = (can_stats.matches * 100) /
156 can_stats.rx_frames;
157
158 can_stats.total_tx_rate = calc_rate(can_stats.jiffies_init, j,
159 can_stats.tx_frames);
160 can_stats.total_rx_rate = calc_rate(can_stats.jiffies_init, j,
161 can_stats.rx_frames);
162
163 /* calc current values */
164 if (can_stats.rx_frames_delta)
165 can_stats.current_rx_match_ratio =
166 (can_stats.matches_delta * 100) /
167 can_stats.rx_frames_delta;
168
169 can_stats.current_tx_rate = calc_rate(0, HZ, can_stats.tx_frames_delta);
170 can_stats.current_rx_rate = calc_rate(0, HZ, can_stats.rx_frames_delta);
171
172 /* check / update maximum values */
173 if (can_stats.max_tx_rate < can_stats.current_tx_rate)
174 can_stats.max_tx_rate = can_stats.current_tx_rate;
175
176 if (can_stats.max_rx_rate < can_stats.current_rx_rate)
177 can_stats.max_rx_rate = can_stats.current_rx_rate;
178
179 if (can_stats.max_rx_match_ratio < can_stats.current_rx_match_ratio)
180 can_stats.max_rx_match_ratio = can_stats.current_rx_match_ratio;
181
182 /* clear values for 'current rate' calculation */
183 can_stats.tx_frames_delta = 0;
184 can_stats.rx_frames_delta = 0;
185 can_stats.matches_delta = 0;
186
187 /* restart timer (one second) */
188 mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
189}
190
191/*
192 * proc read functions
193 *
194 * From known use-cases we expect about 10 entries in a receive list to be
195 * printed in the proc_fs. So PAGE_SIZE is definitely enough space here.
196 *
197 */
198
199static int can_print_rcvlist(char *page, int len, struct hlist_head *rx_list,
200 struct net_device *dev)
201{
202 struct receiver *r;
203 struct hlist_node *n;
204
205 rcu_read_lock();
206 hlist_for_each_entry_rcu(r, n, rx_list, list) {
207 char *fmt = (r->can_id & CAN_EFF_FLAG)?
208 " %-5s %08X %08x %08x %08x %8ld %s\n" :
209 " %-5s %03X %08x %08lx %08lx %8ld %s\n";
210
211 len += snprintf(page + len, PAGE_SIZE - len, fmt,
212 DNAME(dev), r->can_id, r->mask,
213 (unsigned long)r->func, (unsigned long)r->data,
214 r->matches, r->ident);
215
216 /* does a typical line fit into the current buffer? */
217
218 /* 100 Bytes before end of buffer */
219 if (len > PAGE_SIZE - 100) {
220 /* mark output cut off */
221 len += snprintf(page + len, PAGE_SIZE - len,
222 " (..)\n");
223 break;
224 }
225 }
226 rcu_read_unlock();
227
228 return len;
229}
230
231static int can_print_recv_banner(char *page, int len)
232{
233 /*
234 * can1. 00000000 00000000 00000000
235 * ....... 0 tp20
236 */
237 len += snprintf(page + len, PAGE_SIZE - len,
238 " device can_id can_mask function"
239 " userdata matches ident\n");
240
241 return len;
242}
243
244static int can_proc_read_stats(char *page, char **start, off_t off,
245 int count, int *eof, void *data)
246{
247 int len = 0;
248
249 len += snprintf(page + len, PAGE_SIZE - len, "\n");
250 len += snprintf(page + len, PAGE_SIZE - len,
251 " %8ld transmitted frames (TXF)\n",
252 can_stats.tx_frames);
253 len += snprintf(page + len, PAGE_SIZE - len,
254 " %8ld received frames (RXF)\n", can_stats.rx_frames);
255 len += snprintf(page + len, PAGE_SIZE - len,
256 " %8ld matched frames (RXMF)\n", can_stats.matches);
257
258 len += snprintf(page + len, PAGE_SIZE - len, "\n");
259
260 if (can_stattimer.function == can_stat_update) {
261 len += snprintf(page + len, PAGE_SIZE - len,
262 " %8ld %% total match ratio (RXMR)\n",
263 can_stats.total_rx_match_ratio);
264
265 len += snprintf(page + len, PAGE_SIZE - len,
266 " %8ld frames/s total tx rate (TXR)\n",
267 can_stats.total_tx_rate);
268 len += snprintf(page + len, PAGE_SIZE - len,
269 " %8ld frames/s total rx rate (RXR)\n",
270 can_stats.total_rx_rate);
271
272 len += snprintf(page + len, PAGE_SIZE - len, "\n");
273
274 len += snprintf(page + len, PAGE_SIZE - len,
275 " %8ld %% current match ratio (CRXMR)\n",
276 can_stats.current_rx_match_ratio);
277
278 len += snprintf(page + len, PAGE_SIZE - len,
279 " %8ld frames/s current tx rate (CTXR)\n",
280 can_stats.current_tx_rate);
281 len += snprintf(page + len, PAGE_SIZE - len,
282 " %8ld frames/s current rx rate (CRXR)\n",
283 can_stats.current_rx_rate);
284
285 len += snprintf(page + len, PAGE_SIZE - len, "\n");
286
287 len += snprintf(page + len, PAGE_SIZE - len,
288 " %8ld %% max match ratio (MRXMR)\n",
289 can_stats.max_rx_match_ratio);
290
291 len += snprintf(page + len, PAGE_SIZE - len,
292 " %8ld frames/s max tx rate (MTXR)\n",
293 can_stats.max_tx_rate);
294 len += snprintf(page + len, PAGE_SIZE - len,
295 " %8ld frames/s max rx rate (MRXR)\n",
296 can_stats.max_rx_rate);
297
298 len += snprintf(page + len, PAGE_SIZE - len, "\n");
299 }
300
301 len += snprintf(page + len, PAGE_SIZE - len,
302 " %8ld current receive list entries (CRCV)\n",
303 can_pstats.rcv_entries);
304 len += snprintf(page + len, PAGE_SIZE - len,
305 " %8ld maximum receive list entries (MRCV)\n",
306 can_pstats.rcv_entries_max);
307
308 if (can_pstats.stats_reset)
309 len += snprintf(page + len, PAGE_SIZE - len,
310 "\n %8ld statistic resets (STR)\n",
311 can_pstats.stats_reset);
312
313 if (can_pstats.user_reset)
314 len += snprintf(page + len, PAGE_SIZE - len,
315 " %8ld user statistic resets (USTR)\n",
316 can_pstats.user_reset);
317
318 len += snprintf(page + len, PAGE_SIZE - len, "\n");
319
320 *eof = 1;
321 return len;
322}
323
324static int can_proc_read_reset_stats(char *page, char **start, off_t off,
325 int count, int *eof, void *data)
326{
327 int len = 0;
328
329 user_reset = 1;
330
331 if (can_stattimer.function == can_stat_update) {
332 len += snprintf(page + len, PAGE_SIZE - len,
333 "Scheduled statistic reset #%ld.\n",
334 can_pstats.stats_reset + 1);
335
336 } else {
337 if (can_stats.jiffies_init != jiffies)
338 can_init_stats();
339
340 len += snprintf(page + len, PAGE_SIZE - len,
341 "Performed statistic reset #%ld.\n",
342 can_pstats.stats_reset);
343 }
344
345 *eof = 1;
346 return len;
347}
348
349static int can_proc_read_version(char *page, char **start, off_t off,
350 int count, int *eof, void *data)
351{
352 int len = 0;
353
354 len += snprintf(page + len, PAGE_SIZE - len, "%s\n",
355 CAN_VERSION_STRING);
356 *eof = 1;
357 return len;
358}
359
360static int can_proc_read_rcvlist(char *page, char **start, off_t off,
361 int count, int *eof, void *data)
362{
363 /* double cast to prevent GCC warning */
364 int idx = (int)(long)data;
365 int len = 0;
366 struct dev_rcv_lists *d;
367 struct hlist_node *n;
368
369 len += snprintf(page + len, PAGE_SIZE - len,
370 "\nreceive list '%s':\n", rx_list_name[idx]);
371
372 rcu_read_lock();
373 hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
374
375 if (!hlist_empty(&d->rx[idx])) {
376 len = can_print_recv_banner(page, len);
377 len = can_print_rcvlist(page, len, &d->rx[idx], d->dev);
378 } else
379 len += snprintf(page + len, PAGE_SIZE - len,
380 " (%s: no entry)\n", DNAME(d->dev));
381
382 /* exit on end of buffer? */
383 if (len > PAGE_SIZE - 100)
384 break;
385 }
386 rcu_read_unlock();
387
388 len += snprintf(page + len, PAGE_SIZE - len, "\n");
389
390 *eof = 1;
391 return len;
392}
393
394static int can_proc_read_rcvlist_sff(char *page, char **start, off_t off,
395 int count, int *eof, void *data)
396{
397 int len = 0;
398 struct dev_rcv_lists *d;
399 struct hlist_node *n;
400
401 /* RX_SFF */
402 len += snprintf(page + len, PAGE_SIZE - len,
403 "\nreceive list 'rx_sff':\n");
404
405 rcu_read_lock();
406 hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
407 int i, all_empty = 1;
408 /* check wether at least one list is non-empty */
409 for (i = 0; i < 0x800; i++)
410 if (!hlist_empty(&d->rx_sff[i])) {
411 all_empty = 0;
412 break;
413 }
414
415 if (!all_empty) {
416 len = can_print_recv_banner(page, len);
417 for (i = 0; i < 0x800; i++) {
418 if (!hlist_empty(&d->rx_sff[i]) &&
419 len < PAGE_SIZE - 100)
420 len = can_print_rcvlist(page, len,
421 &d->rx_sff[i],
422 d->dev);
423 }
424 } else
425 len += snprintf(page + len, PAGE_SIZE - len,
426 " (%s: no entry)\n", DNAME(d->dev));
427
428 /* exit on end of buffer? */
429 if (len > PAGE_SIZE - 100)
430 break;
431 }
432 rcu_read_unlock();
433
434 len += snprintf(page + len, PAGE_SIZE - len, "\n");
435
436 *eof = 1;
437 return len;
438}
439
440/*
441 * proc utility functions
442 */
443
444static struct proc_dir_entry *can_create_proc_readentry(const char *name,
445 mode_t mode,
446 read_proc_t *read_proc,
447 void *data)
448{
449 if (can_dir)
450 return create_proc_read_entry(name, mode, can_dir, read_proc,
451 data);
452 else
453 return NULL;
454}
455
456static void can_remove_proc_readentry(const char *name)
457{
458 if (can_dir)
459 remove_proc_entry(name, can_dir);
460}
461
462/*
463 * can_init_proc - create main CAN proc directory and procfs entries
464 */
465void can_init_proc(void)
466{
467 /* create /proc/net/can directory */
468 can_dir = proc_mkdir("can", init_net.proc_net);
469
470 if (!can_dir) {
471 printk(KERN_INFO "can: failed to create /proc/net/can . "
472 "CONFIG_PROC_FS missing?\n");
473 return;
474 }
475
476 can_dir->owner = THIS_MODULE;
477
478 /* own procfs entries from the AF_CAN core */
479 pde_version = can_create_proc_readentry(CAN_PROC_VERSION, 0644,
480 can_proc_read_version, NULL);
481 pde_stats = can_create_proc_readentry(CAN_PROC_STATS, 0644,
482 can_proc_read_stats, NULL);
483 pde_reset_stats = can_create_proc_readentry(CAN_PROC_RESET_STATS, 0644,
484 can_proc_read_reset_stats, NULL);
485 pde_rcvlist_err = can_create_proc_readentry(CAN_PROC_RCVLIST_ERR, 0644,
486 can_proc_read_rcvlist, (void *)RX_ERR);
487 pde_rcvlist_all = can_create_proc_readentry(CAN_PROC_RCVLIST_ALL, 0644,
488 can_proc_read_rcvlist, (void *)RX_ALL);
489 pde_rcvlist_fil = can_create_proc_readentry(CAN_PROC_RCVLIST_FIL, 0644,
490 can_proc_read_rcvlist, (void *)RX_FIL);
491 pde_rcvlist_inv = can_create_proc_readentry(CAN_PROC_RCVLIST_INV, 0644,
492 can_proc_read_rcvlist, (void *)RX_INV);
493 pde_rcvlist_eff = can_create_proc_readentry(CAN_PROC_RCVLIST_EFF, 0644,
494 can_proc_read_rcvlist, (void *)RX_EFF);
495 pde_rcvlist_sff = can_create_proc_readentry(CAN_PROC_RCVLIST_SFF, 0644,
496 can_proc_read_rcvlist_sff, NULL);
497}
498
499/*
500 * can_remove_proc - remove procfs entries and main CAN proc directory
501 */
502void can_remove_proc(void)
503{
504 if (pde_version)
505 can_remove_proc_readentry(CAN_PROC_VERSION);
506
507 if (pde_stats)
508 can_remove_proc_readentry(CAN_PROC_STATS);
509
510 if (pde_reset_stats)
511 can_remove_proc_readentry(CAN_PROC_RESET_STATS);
512
513 if (pde_rcvlist_err)
514 can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR);
515
516 if (pde_rcvlist_all)
517 can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL);
518
519 if (pde_rcvlist_fil)
520 can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL);
521
522 if (pde_rcvlist_inv)
523 can_remove_proc_readentry(CAN_PROC_RCVLIST_INV);
524
525 if (pde_rcvlist_eff)
526 can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF);
527
528 if (pde_rcvlist_sff)
529 can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF);
530
531 if (can_dir)
532 proc_net_remove(&init_net, "can");
533}
diff --git a/net/can/raw.c b/net/can/raw.c
new file mode 100644
index 000000000000..aeefd1419d00
--- /dev/null
+++ b/net/can/raw.c
@@ -0,0 +1,763 @@
1/*
2 * raw.c - Raw sockets for protocol family CAN
3 *
4 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Volkswagen nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * Alternatively, provided that this notice is retained in full, this
20 * software may be distributed under the terms of the GNU General
21 * Public License ("GPL") version 2, in which case the provisions of the
22 * GPL apply INSTEAD OF those given above.
23 *
24 * The provided data structures and external interfaces from this code
25 * are not restricted to be used by modules with a GPL compatible license.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 *
40 * Send feedback to <socketcan-users@lists.berlios.de>
41 *
42 */
43
44#include <linux/module.h>
45#include <linux/init.h>
46#include <linux/uio.h>
47#include <linux/net.h>
48#include <linux/netdevice.h>
49#include <linux/socket.h>
50#include <linux/if_arp.h>
51#include <linux/skbuff.h>
52#include <linux/can.h>
53#include <linux/can/core.h>
54#include <linux/can/raw.h>
55#include <net/sock.h>
56#include <net/net_namespace.h>
57
58#define CAN_RAW_VERSION CAN_VERSION
59static __initdata const char banner[] =
60 KERN_INFO "can: raw protocol (rev " CAN_RAW_VERSION ")\n";
61
62MODULE_DESCRIPTION("PF_CAN raw protocol");
63MODULE_LICENSE("Dual BSD/GPL");
64MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>");
65
66#define MASK_ALL 0
67
68/*
69 * A raw socket has a list of can_filters attached to it, each receiving
70 * the CAN frames matching that filter. If the filter list is empty,
71 * no CAN frames will be received by the socket. The default after
72 * opening the socket, is to have one filter which receives all frames.
73 * The filter list is allocated dynamically with the exception of the
74 * list containing only one item. This common case is optimized by
75 * storing the single filter in dfilter, to avoid using dynamic memory.
76 */
77
78struct raw_sock {
79 struct sock sk;
80 int bound;
81 int ifindex;
82 struct notifier_block notifier;
83 int loopback;
84 int recv_own_msgs;
85 int count; /* number of active filters */
86 struct can_filter dfilter; /* default/single filter */
87 struct can_filter *filter; /* pointer to filter(s) */
88 can_err_mask_t err_mask;
89};
90
91static inline struct raw_sock *raw_sk(const struct sock *sk)
92{
93 return (struct raw_sock *)sk;
94}
95
96static void raw_rcv(struct sk_buff *skb, void *data)
97{
98 struct sock *sk = (struct sock *)data;
99 struct raw_sock *ro = raw_sk(sk);
100 struct sockaddr_can *addr;
101 int error;
102
103 if (!ro->recv_own_msgs) {
104 /* check the received tx sock reference */
105 if (skb->sk == sk) {
106 kfree_skb(skb);
107 return;
108 }
109 }
110
111 /*
112 * Put the datagram to the queue so that raw_recvmsg() can
113 * get it from there. We need to pass the interface index to
114 * raw_recvmsg(). We pass a whole struct sockaddr_can in skb->cb
115 * containing the interface index.
116 */
117
118 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
119 addr = (struct sockaddr_can *)skb->cb;
120 memset(addr, 0, sizeof(*addr));
121 addr->can_family = AF_CAN;
122 addr->can_ifindex = skb->dev->ifindex;
123
124 error = sock_queue_rcv_skb(sk, skb);
125 if (error < 0)
126 kfree_skb(skb);
127}
128
129static int raw_enable_filters(struct net_device *dev, struct sock *sk,
130 struct can_filter *filter,
131 int count)
132{
133 int err = 0;
134 int i;
135
136 for (i = 0; i < count; i++) {
137 err = can_rx_register(dev, filter[i].can_id,
138 filter[i].can_mask,
139 raw_rcv, sk, "raw");
140 if (err) {
141 /* clean up successfully registered filters */
142 while (--i >= 0)
143 can_rx_unregister(dev, filter[i].can_id,
144 filter[i].can_mask,
145 raw_rcv, sk);
146 break;
147 }
148 }
149
150 return err;
151}
152
153static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
154 can_err_mask_t err_mask)
155{
156 int err = 0;
157
158 if (err_mask)
159 err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
160 raw_rcv, sk, "raw");
161
162 return err;
163}
164
165static void raw_disable_filters(struct net_device *dev, struct sock *sk,
166 struct can_filter *filter,
167 int count)
168{
169 int i;
170
171 for (i = 0; i < count; i++)
172 can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask,
173 raw_rcv, sk);
174}
175
176static inline void raw_disable_errfilter(struct net_device *dev,
177 struct sock *sk,
178 can_err_mask_t err_mask)
179
180{
181 if (err_mask)
182 can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG,
183 raw_rcv, sk);
184}
185
186static inline void raw_disable_allfilters(struct net_device *dev,
187 struct sock *sk)
188{
189 struct raw_sock *ro = raw_sk(sk);
190
191 raw_disable_filters(dev, sk, ro->filter, ro->count);
192 raw_disable_errfilter(dev, sk, ro->err_mask);
193}
194
195static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
196{
197 struct raw_sock *ro = raw_sk(sk);
198 int err;
199
200 err = raw_enable_filters(dev, sk, ro->filter, ro->count);
201 if (!err) {
202 err = raw_enable_errfilter(dev, sk, ro->err_mask);
203 if (err)
204 raw_disable_filters(dev, sk, ro->filter, ro->count);
205 }
206
207 return err;
208}
209
210static int raw_notifier(struct notifier_block *nb,
211 unsigned long msg, void *data)
212{
213 struct net_device *dev = (struct net_device *)data;
214 struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
215 struct sock *sk = &ro->sk;
216
217 if (dev->nd_net != &init_net)
218 return NOTIFY_DONE;
219
220 if (dev->type != ARPHRD_CAN)
221 return NOTIFY_DONE;
222
223 if (ro->ifindex != dev->ifindex)
224 return NOTIFY_DONE;
225
226 switch (msg) {
227
228 case NETDEV_UNREGISTER:
229 lock_sock(sk);
230 /* remove current filters & unregister */
231 if (ro->bound)
232 raw_disable_allfilters(dev, sk);
233
234 if (ro->count > 1)
235 kfree(ro->filter);
236
237 ro->ifindex = 0;
238 ro->bound = 0;
239 ro->count = 0;
240 release_sock(sk);
241
242 sk->sk_err = ENODEV;
243 if (!sock_flag(sk, SOCK_DEAD))
244 sk->sk_error_report(sk);
245 break;
246
247 case NETDEV_DOWN:
248 sk->sk_err = ENETDOWN;
249 if (!sock_flag(sk, SOCK_DEAD))
250 sk->sk_error_report(sk);
251 break;
252 }
253
254 return NOTIFY_DONE;
255}
256
257static int raw_init(struct sock *sk)
258{
259 struct raw_sock *ro = raw_sk(sk);
260
261 ro->bound = 0;
262 ro->ifindex = 0;
263
264 /* set default filter to single entry dfilter */
265 ro->dfilter.can_id = 0;
266 ro->dfilter.can_mask = MASK_ALL;
267 ro->filter = &ro->dfilter;
268 ro->count = 1;
269
270 /* set default loopback behaviour */
271 ro->loopback = 1;
272 ro->recv_own_msgs = 0;
273
274 /* set notifier */
275 ro->notifier.notifier_call = raw_notifier;
276
277 register_netdevice_notifier(&ro->notifier);
278
279 return 0;
280}
281
282static int raw_release(struct socket *sock)
283{
284 struct sock *sk = sock->sk;
285 struct raw_sock *ro = raw_sk(sk);
286
287 unregister_netdevice_notifier(&ro->notifier);
288
289 lock_sock(sk);
290
291 /* remove current filters & unregister */
292 if (ro->bound) {
293 if (ro->ifindex) {
294 struct net_device *dev;
295
296 dev = dev_get_by_index(&init_net, ro->ifindex);
297 if (dev) {
298 raw_disable_allfilters(dev, sk);
299 dev_put(dev);
300 }
301 } else
302 raw_disable_allfilters(NULL, sk);
303 }
304
305 if (ro->count > 1)
306 kfree(ro->filter);
307
308 ro->ifindex = 0;
309 ro->bound = 0;
310 ro->count = 0;
311
312 release_sock(sk);
313 sock_put(sk);
314
315 return 0;
316}
317
318static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
319{
320 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
321 struct sock *sk = sock->sk;
322 struct raw_sock *ro = raw_sk(sk);
323 int ifindex;
324 int err = 0;
325 int notify_enetdown = 0;
326
327 if (len < sizeof(*addr))
328 return -EINVAL;
329
330 lock_sock(sk);
331
332 if (ro->bound && addr->can_ifindex == ro->ifindex)
333 goto out;
334
335 if (addr->can_ifindex) {
336 struct net_device *dev;
337
338 dev = dev_get_by_index(&init_net, addr->can_ifindex);
339 if (!dev) {
340 err = -ENODEV;
341 goto out;
342 }
343 if (dev->type != ARPHRD_CAN) {
344 dev_put(dev);
345 err = -ENODEV;
346 goto out;
347 }
348 if (!(dev->flags & IFF_UP))
349 notify_enetdown = 1;
350
351 ifindex = dev->ifindex;
352
353 /* filters set by default/setsockopt */
354 err = raw_enable_allfilters(dev, sk);
355 dev_put(dev);
356
357 } else {
358 ifindex = 0;
359
360 /* filters set by default/setsockopt */
361 err = raw_enable_allfilters(NULL, sk);
362 }
363
364 if (!err) {
365 if (ro->bound) {
366 /* unregister old filters */
367 if (ro->ifindex) {
368 struct net_device *dev;
369
370 dev = dev_get_by_index(&init_net, ro->ifindex);
371 if (dev) {
372 raw_disable_allfilters(dev, sk);
373 dev_put(dev);
374 }
375 } else
376 raw_disable_allfilters(NULL, sk);
377 }
378 ro->ifindex = ifindex;
379 ro->bound = 1;
380 }
381
382 out:
383 release_sock(sk);
384
385 if (notify_enetdown) {
386 sk->sk_err = ENETDOWN;
387 if (!sock_flag(sk, SOCK_DEAD))
388 sk->sk_error_report(sk);
389 }
390
391 return err;
392}
393
394static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
395 int *len, int peer)
396{
397 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
398 struct sock *sk = sock->sk;
399 struct raw_sock *ro = raw_sk(sk);
400
401 if (peer)
402 return -EOPNOTSUPP;
403
404 addr->can_family = AF_CAN;
405 addr->can_ifindex = ro->ifindex;
406
407 *len = sizeof(*addr);
408
409 return 0;
410}
411
412static int raw_setsockopt(struct socket *sock, int level, int optname,
413 char __user *optval, int optlen)
414{
415 struct sock *sk = sock->sk;
416 struct raw_sock *ro = raw_sk(sk);
417 struct can_filter *filter = NULL; /* dyn. alloc'ed filters */
418 struct can_filter sfilter; /* single filter */
419 struct net_device *dev = NULL;
420 can_err_mask_t err_mask = 0;
421 int count = 0;
422 int err = 0;
423
424 if (level != SOL_CAN_RAW)
425 return -EINVAL;
426 if (optlen < 0)
427 return -EINVAL;
428
429 switch (optname) {
430
431 case CAN_RAW_FILTER:
432 if (optlen % sizeof(struct can_filter) != 0)
433 return -EINVAL;
434
435 count = optlen / sizeof(struct can_filter);
436
437 if (count > 1) {
438 /* filter does not fit into dfilter => alloc space */
439 filter = kmalloc(optlen, GFP_KERNEL);
440 if (!filter)
441 return -ENOMEM;
442
443 err = copy_from_user(filter, optval, optlen);
444 if (err) {
445 kfree(filter);
446 return err;
447 }
448 } else if (count == 1) {
449 err = copy_from_user(&sfilter, optval, optlen);
450 if (err)
451 return err;
452 }
453
454 lock_sock(sk);
455
456 if (ro->bound && ro->ifindex)
457 dev = dev_get_by_index(&init_net, ro->ifindex);
458
459 if (ro->bound) {
460 /* (try to) register the new filters */
461 if (count == 1)
462 err = raw_enable_filters(dev, sk, &sfilter, 1);
463 else
464 err = raw_enable_filters(dev, sk, filter,
465 count);
466 if (err) {
467 if (count > 1)
468 kfree(filter);
469
470 goto out_fil;
471 }
472
473 /* remove old filter registrations */
474 raw_disable_filters(dev, sk, ro->filter, ro->count);
475 }
476
477 /* remove old filter space */
478 if (ro->count > 1)
479 kfree(ro->filter);
480
481 /* link new filters to the socket */
482 if (count == 1) {
483 /* copy filter data for single filter */
484 ro->dfilter = sfilter;
485 filter = &ro->dfilter;
486 }
487 ro->filter = filter;
488 ro->count = count;
489
490 out_fil:
491 if (dev)
492 dev_put(dev);
493
494 release_sock(sk);
495
496 break;
497
498 case CAN_RAW_ERR_FILTER:
499 if (optlen != sizeof(err_mask))
500 return -EINVAL;
501
502 err = copy_from_user(&err_mask, optval, optlen);
503 if (err)
504 return err;
505
506 err_mask &= CAN_ERR_MASK;
507
508 lock_sock(sk);
509
510 if (ro->bound && ro->ifindex)
511 dev = dev_get_by_index(&init_net, ro->ifindex);
512
513 /* remove current error mask */
514 if (ro->bound) {
515 /* (try to) register the new err_mask */
516 err = raw_enable_errfilter(dev, sk, err_mask);
517
518 if (err)
519 goto out_err;
520
521 /* remove old err_mask registration */
522 raw_disable_errfilter(dev, sk, ro->err_mask);
523 }
524
525 /* link new err_mask to the socket */
526 ro->err_mask = err_mask;
527
528 out_err:
529 if (dev)
530 dev_put(dev);
531
532 release_sock(sk);
533
534 break;
535
536 case CAN_RAW_LOOPBACK:
537 if (optlen != sizeof(ro->loopback))
538 return -EINVAL;
539
540 err = copy_from_user(&ro->loopback, optval, optlen);
541
542 break;
543
544 case CAN_RAW_RECV_OWN_MSGS:
545 if (optlen != sizeof(ro->recv_own_msgs))
546 return -EINVAL;
547
548 err = copy_from_user(&ro->recv_own_msgs, optval, optlen);
549
550 break;
551
552 default:
553 return -ENOPROTOOPT;
554 }
555 return err;
556}
557
558static int raw_getsockopt(struct socket *sock, int level, int optname,
559 char __user *optval, int __user *optlen)
560{
561 struct sock *sk = sock->sk;
562 struct raw_sock *ro = raw_sk(sk);
563 int len;
564 void *val;
565 int err = 0;
566
567 if (level != SOL_CAN_RAW)
568 return -EINVAL;
569 if (get_user(len, optlen))
570 return -EFAULT;
571 if (len < 0)
572 return -EINVAL;
573
574 switch (optname) {
575
576 case CAN_RAW_FILTER:
577 lock_sock(sk);
578 if (ro->count > 0) {
579 int fsize = ro->count * sizeof(struct can_filter);
580 if (len > fsize)
581 len = fsize;
582 err = copy_to_user(optval, ro->filter, len);
583 } else
584 len = 0;
585 release_sock(sk);
586
587 if (!err)
588 err = put_user(len, optlen);
589 return err;
590
591 case CAN_RAW_ERR_FILTER:
592 if (len > sizeof(can_err_mask_t))
593 len = sizeof(can_err_mask_t);
594 val = &ro->err_mask;
595 break;
596
597 case CAN_RAW_LOOPBACK:
598 if (len > sizeof(int))
599 len = sizeof(int);
600 val = &ro->loopback;
601 break;
602
603 case CAN_RAW_RECV_OWN_MSGS:
604 if (len > sizeof(int))
605 len = sizeof(int);
606 val = &ro->recv_own_msgs;
607 break;
608
609 default:
610 return -ENOPROTOOPT;
611 }
612
613 if (put_user(len, optlen))
614 return -EFAULT;
615 if (copy_to_user(optval, val, len))
616 return -EFAULT;
617 return 0;
618}
619
620static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
621 struct msghdr *msg, size_t size)
622{
623 struct sock *sk = sock->sk;
624 struct raw_sock *ro = raw_sk(sk);
625 struct sk_buff *skb;
626 struct net_device *dev;
627 int ifindex;
628 int err;
629
630 if (msg->msg_name) {
631 struct sockaddr_can *addr =
632 (struct sockaddr_can *)msg->msg_name;
633
634 if (addr->can_family != AF_CAN)
635 return -EINVAL;
636
637 ifindex = addr->can_ifindex;
638 } else
639 ifindex = ro->ifindex;
640
641 dev = dev_get_by_index(&init_net, ifindex);
642 if (!dev)
643 return -ENXIO;
644
645 skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT,
646 &err);
647 if (!skb) {
648 dev_put(dev);
649 return err;
650 }
651
652 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
653 if (err < 0) {
654 kfree_skb(skb);
655 dev_put(dev);
656 return err;
657 }
658 skb->dev = dev;
659 skb->sk = sk;
660
661 err = can_send(skb, ro->loopback);
662
663 dev_put(dev);
664
665 if (err)
666 return err;
667
668 return size;
669}
670
671static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
672 struct msghdr *msg, size_t size, int flags)
673{
674 struct sock *sk = sock->sk;
675 struct sk_buff *skb;
676 int error = 0;
677 int noblock;
678
679 noblock = flags & MSG_DONTWAIT;
680 flags &= ~MSG_DONTWAIT;
681
682 skb = skb_recv_datagram(sk, flags, noblock, &error);
683 if (!skb)
684 return error;
685
686 if (size < skb->len)
687 msg->msg_flags |= MSG_TRUNC;
688 else
689 size = skb->len;
690
691 error = memcpy_toiovec(msg->msg_iov, skb->data, size);
692 if (error < 0) {
693 skb_free_datagram(sk, skb);
694 return error;
695 }
696
697 sock_recv_timestamp(msg, sk, skb);
698
699 if (msg->msg_name) {
700 msg->msg_namelen = sizeof(struct sockaddr_can);
701 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
702 }
703
704 skb_free_datagram(sk, skb);
705
706 return size;
707}
708
709static struct proto_ops raw_ops __read_mostly = {
710 .family = PF_CAN,
711 .release = raw_release,
712 .bind = raw_bind,
713 .connect = sock_no_connect,
714 .socketpair = sock_no_socketpair,
715 .accept = sock_no_accept,
716 .getname = raw_getname,
717 .poll = datagram_poll,
718 .ioctl = NULL, /* use can_ioctl() from af_can.c */
719 .listen = sock_no_listen,
720 .shutdown = sock_no_shutdown,
721 .setsockopt = raw_setsockopt,
722 .getsockopt = raw_getsockopt,
723 .sendmsg = raw_sendmsg,
724 .recvmsg = raw_recvmsg,
725 .mmap = sock_no_mmap,
726 .sendpage = sock_no_sendpage,
727};
728
729static struct proto raw_proto __read_mostly = {
730 .name = "CAN_RAW",
731 .owner = THIS_MODULE,
732 .obj_size = sizeof(struct raw_sock),
733 .init = raw_init,
734};
735
736static struct can_proto raw_can_proto __read_mostly = {
737 .type = SOCK_RAW,
738 .protocol = CAN_RAW,
739 .capability = -1,
740 .ops = &raw_ops,
741 .prot = &raw_proto,
742};
743
744static __init int raw_module_init(void)
745{
746 int err;
747
748 printk(banner);
749
750 err = can_proto_register(&raw_can_proto);
751 if (err < 0)
752 printk(KERN_ERR "can: registration of raw protocol failed\n");
753
754 return err;
755}
756
757static __exit void raw_module_exit(void)
758{
759 can_proto_unregister(&raw_can_proto);
760}
761
762module_init(raw_module_init);
763module_exit(raw_module_exit);
diff --git a/net/compat.c b/net/compat.c
index d74d82155d78..80013fb69a61 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -20,7 +20,6 @@
20#include <linux/syscalls.h> 20#include <linux/syscalls.h>
21#include <linux/filter.h> 21#include <linux/filter.h>
22#include <linux/compat.h> 22#include <linux/compat.h>
23#include <linux/netfilter_ipv4/ip_tables.h>
24#include <linux/security.h> 23#include <linux/security.h>
25 24
26#include <net/scm.h> 25#include <net/scm.h>
@@ -254,6 +253,8 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
254 if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr))) 253 if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
255 return -EFAULT; 254 return -EFAULT;
256 cmlen = CMSG_COMPAT_SPACE(len); 255 cmlen = CMSG_COMPAT_SPACE(len);
256 if (kmsg->msg_controllen < cmlen)
257 cmlen = kmsg->msg_controllen;
257 kmsg->msg_control += cmlen; 258 kmsg->msg_control += cmlen;
258 kmsg->msg_controllen -= cmlen; 259 kmsg->msg_controllen -= cmlen;
259 return 0; 260 return 0;
@@ -315,107 +316,6 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
315} 316}
316 317
317/* 318/*
318 * For now, we assume that the compatibility and native version
319 * of struct ipt_entry are the same - sfr. FIXME
320 */
321struct compat_ipt_replace {
322 char name[IPT_TABLE_MAXNAMELEN];
323 u32 valid_hooks;
324 u32 num_entries;
325 u32 size;
326 u32 hook_entry[NF_IP_NUMHOOKS];
327 u32 underflow[NF_IP_NUMHOOKS];
328 u32 num_counters;
329 compat_uptr_t counters; /* struct ipt_counters * */
330 struct ipt_entry entries[0];
331};
332
333static int do_netfilter_replace(int fd, int level, int optname,
334 char __user *optval, int optlen)
335{
336 struct compat_ipt_replace __user *urepl;
337 struct ipt_replace __user *repl_nat;
338 char name[IPT_TABLE_MAXNAMELEN];
339 u32 origsize, tmp32, num_counters;
340 unsigned int repl_nat_size;
341 int ret;
342 int i;
343 compat_uptr_t ucntrs;
344
345 urepl = (struct compat_ipt_replace __user *)optval;
346 if (get_user(origsize, &urepl->size))
347 return -EFAULT;
348
349 /* Hack: Causes ipchains to give correct error msg --RR */
350 if (optlen != sizeof(*urepl) + origsize)
351 return -ENOPROTOOPT;
352
353 /* XXX Assumes that size of ipt_entry is the same both in
354 * native and compat environments.
355 */
356 repl_nat_size = sizeof(*repl_nat) + origsize;
357 repl_nat = compat_alloc_user_space(repl_nat_size);
358
359 ret = -EFAULT;
360 if (put_user(origsize, &repl_nat->size))
361 goto out;
362
363 if (!access_ok(VERIFY_READ, urepl, optlen) ||
364 !access_ok(VERIFY_WRITE, repl_nat, optlen))
365 goto out;
366
367 if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
368 __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
369 goto out;
370
371 if (__get_user(tmp32, &urepl->valid_hooks) ||
372 __put_user(tmp32, &repl_nat->valid_hooks))
373 goto out;
374
375 if (__get_user(tmp32, &urepl->num_entries) ||
376 __put_user(tmp32, &repl_nat->num_entries))
377 goto out;
378
379 if (__get_user(num_counters, &urepl->num_counters) ||
380 __put_user(num_counters, &repl_nat->num_counters))
381 goto out;
382
383 if (__get_user(ucntrs, &urepl->counters) ||
384 __put_user(compat_ptr(ucntrs), &repl_nat->counters))
385 goto out;
386
387 if (__copy_in_user(&repl_nat->entries[0],
388 &urepl->entries[0],
389 origsize))
390 goto out;
391
392 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
393 if (__get_user(tmp32, &urepl->hook_entry[i]) ||
394 __put_user(tmp32, &repl_nat->hook_entry[i]) ||
395 __get_user(tmp32, &urepl->underflow[i]) ||
396 __put_user(tmp32, &repl_nat->underflow[i]))
397 goto out;
398 }
399
400 /*
401 * Since struct ipt_counters just contains two u_int64_t members
402 * we can just do the access_ok check here and pass the (converted)
403 * pointer into the standard syscall. We hope that the pointer is
404 * not misaligned ...
405 */
406 if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
407 num_counters * sizeof(struct ipt_counters)))
408 goto out;
409
410
411 ret = sys_setsockopt(fd, level, optname,
412 (char __user *)repl_nat, repl_nat_size);
413
414out:
415 return ret;
416}
417
418/*
419 * A struct sock_filter is architecture independent. 319 * A struct sock_filter is architecture independent.
420 */ 320 */
421struct compat_sock_fprog { 321struct compat_sock_fprog {
@@ -483,10 +383,6 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
483 int err; 383 int err;
484 struct socket *sock; 384 struct socket *sock;
485 385
486 if (level == SOL_IPV6 && optname == IPT_SO_SET_REPLACE)
487 return do_netfilter_replace(fd, level, optname,
488 optval, optlen);
489
490 if (optlen < 0) 386 if (optlen < 0)
491 return -EINVAL; 387 return -EINVAL;
492 388
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 029b93e246b4..8a28fc93b724 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -115,10 +115,10 @@ out_noerr:
115} 115}
116 116
117/** 117/**
118 * skb_recv_datagram - Receive a datagram skbuff 118 * __skb_recv_datagram - Receive a datagram skbuff
119 * @sk: socket 119 * @sk: socket
120 * @flags: MSG_ flags 120 * @flags: MSG_ flags
121 * @noblock: blocking operation? 121 * @peeked: returns non-zero if this packet has been seen before
122 * @err: error code returned 122 * @err: error code returned
123 * 123 *
124 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 124 * Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -143,8 +143,8 @@ out_noerr:
143 * quite explicitly by POSIX 1003.1g, don't change them without having 143 * quite explicitly by POSIX 1003.1g, don't change them without having
144 * the standard around please. 144 * the standard around please.
145 */ 145 */
146struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, 146struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
147 int noblock, int *err) 147 int *peeked, int *err)
148{ 148{
149 struct sk_buff *skb; 149 struct sk_buff *skb;
150 long timeo; 150 long timeo;
@@ -156,7 +156,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
156 if (error) 156 if (error)
157 goto no_packet; 157 goto no_packet;
158 158
159 timeo = sock_rcvtimeo(sk, noblock); 159 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
160 160
161 do { 161 do {
162 /* Again only user level code calls this function, so nothing 162 /* Again only user level code calls this function, so nothing
@@ -165,18 +165,19 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
165 * Look at current nfs client by the way... 165 * Look at current nfs client by the way...
166 * However, this function was corrent in any case. 8) 166 * However, this function was corrent in any case. 8)
167 */ 167 */
168 if (flags & MSG_PEEK) { 168 unsigned long cpu_flags;
169 unsigned long cpu_flags; 169
170 170 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
171 spin_lock_irqsave(&sk->sk_receive_queue.lock, 171 skb = skb_peek(&sk->sk_receive_queue);
172 cpu_flags); 172 if (skb) {
173 skb = skb_peek(&sk->sk_receive_queue); 173 *peeked = skb->peeked;
174 if (skb) 174 if (flags & MSG_PEEK) {
175 skb->peeked = 1;
175 atomic_inc(&skb->users); 176 atomic_inc(&skb->users);
176 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, 177 } else
177 cpu_flags); 178 __skb_unlink(skb, &sk->sk_receive_queue);
178 } else 179 }
179 skb = skb_dequeue(&sk->sk_receive_queue); 180 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
180 181
181 if (skb) 182 if (skb)
182 return skb; 183 return skb;
@@ -194,10 +195,21 @@ no_packet:
194 *err = error; 195 *err = error;
195 return NULL; 196 return NULL;
196} 197}
198EXPORT_SYMBOL(__skb_recv_datagram);
199
200struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
201 int noblock, int *err)
202{
203 int peeked;
204
205 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
206 &peeked, err);
207}
197 208
198void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 209void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
199{ 210{
200 kfree_skb(skb); 211 kfree_skb(skb);
212 sk_mem_reclaim(sk);
201} 213}
202 214
203/** 215/**
@@ -217,20 +229,28 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
217 * This function currently only disables BH when acquiring the 229 * This function currently only disables BH when acquiring the
218 * sk_receive_queue lock. Therefore it must not be used in a 230 * sk_receive_queue lock. Therefore it must not be used in a
219 * context where that lock is acquired in an IRQ context. 231 * context where that lock is acquired in an IRQ context.
232 *
233 * It returns 0 if the packet was removed by us.
220 */ 234 */
221 235
222void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 236int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
223{ 237{
238 int err = 0;
239
224 if (flags & MSG_PEEK) { 240 if (flags & MSG_PEEK) {
241 err = -ENOENT;
225 spin_lock_bh(&sk->sk_receive_queue.lock); 242 spin_lock_bh(&sk->sk_receive_queue.lock);
226 if (skb == skb_peek(&sk->sk_receive_queue)) { 243 if (skb == skb_peek(&sk->sk_receive_queue)) {
227 __skb_unlink(skb, &sk->sk_receive_queue); 244 __skb_unlink(skb, &sk->sk_receive_queue);
228 atomic_dec(&skb->users); 245 atomic_dec(&skb->users);
246 err = 0;
229 } 247 }
230 spin_unlock_bh(&sk->sk_receive_queue.lock); 248 spin_unlock_bh(&sk->sk_receive_queue.lock);
231 } 249 }
232 250
233 kfree_skb(skb); 251 kfree_skb(skb);
252 sk_mem_reclaim(sk);
253 return err;
234} 254}
235 255
236EXPORT_SYMBOL(skb_kill_datagram); 256EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 853c8b575f1d..9549417250bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -150,8 +150,11 @@
150 * 86DD IPv6 150 * 86DD IPv6
151 */ 151 */
152 152
153#define PTYPE_HASH_SIZE (16)
154#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
155
153static DEFINE_SPINLOCK(ptype_lock); 156static DEFINE_SPINLOCK(ptype_lock);
154static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ 157static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
155static struct list_head ptype_all __read_mostly; /* Taps */ 158static struct list_head ptype_all __read_mostly; /* Taps */
156 159
157#ifdef CONFIG_NET_DMA 160#ifdef CONFIG_NET_DMA
@@ -362,7 +365,7 @@ void dev_add_pack(struct packet_type *pt)
362 if (pt->type == htons(ETH_P_ALL)) 365 if (pt->type == htons(ETH_P_ALL))
363 list_add_rcu(&pt->list, &ptype_all); 366 list_add_rcu(&pt->list, &ptype_all);
364 else { 367 else {
365 hash = ntohs(pt->type) & 15; 368 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
366 list_add_rcu(&pt->list, &ptype_base[hash]); 369 list_add_rcu(&pt->list, &ptype_base[hash]);
367 } 370 }
368 spin_unlock_bh(&ptype_lock); 371 spin_unlock_bh(&ptype_lock);
@@ -391,7 +394,7 @@ void __dev_remove_pack(struct packet_type *pt)
391 if (pt->type == htons(ETH_P_ALL)) 394 if (pt->type == htons(ETH_P_ALL))
392 head = &ptype_all; 395 head = &ptype_all;
393 else 396 else
394 head = &ptype_base[ntohs(pt->type) & 15]; 397 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
395 398
396 list_for_each_entry(pt1, head, list) { 399 list_for_each_entry(pt1, head, list) {
397 if (pt == pt1) { 400 if (pt == pt1) {
@@ -672,7 +675,7 @@ struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *h
672 675
673 ASSERT_RTNL(); 676 ASSERT_RTNL();
674 677
675 for_each_netdev(&init_net, dev) 678 for_each_netdev(net, dev)
676 if (dev->type == type && 679 if (dev->type == type &&
677 !memcmp(dev->dev_addr, ha, dev->addr_len)) 680 !memcmp(dev->dev_addr, ha, dev->addr_len))
678 return dev; 681 return dev;
@@ -1171,6 +1174,8 @@ rollback:
1171 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1174 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1172 } 1175 }
1173 } 1176 }
1177
1178 raw_notifier_chain_unregister(&netdev_chain, nb);
1174 goto unlock; 1179 goto unlock;
1175} 1180}
1176 1181
@@ -1418,7 +1423,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1418 } 1423 }
1419 1424
1420 rcu_read_lock(); 1425 rcu_read_lock();
1421 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { 1426 list_for_each_entry_rcu(ptype,
1427 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1422 if (ptype->type == type && !ptype->dev && ptype->gso_segment) { 1428 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1423 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 1429 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1424 err = ptype->gso_send_check(skb); 1430 err = ptype->gso_send_check(skb);
@@ -1751,9 +1757,6 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1751 * 1757 *
1752 * return values: 1758 * return values:
1753 * NET_RX_SUCCESS (no congestion) 1759 * NET_RX_SUCCESS (no congestion)
1754 * NET_RX_CN_LOW (low congestion)
1755 * NET_RX_CN_MOD (moderate congestion)
1756 * NET_RX_CN_HIGH (high congestion)
1757 * NET_RX_DROP (packet was dropped) 1760 * NET_RX_DROP (packet was dropped)
1758 * 1761 *
1759 */ 1762 */
@@ -2001,6 +2004,21 @@ out:
2001} 2004}
2002#endif 2005#endif
2003 2006
2007/**
2008 * netif_receive_skb - process receive buffer from network
2009 * @skb: buffer to process
2010 *
2011 * netif_receive_skb() is the main receive data processing function.
2012 * It always succeeds. The buffer may be dropped during processing
2013 * for congestion control or by the protocol layers.
2014 *
2015 * This function may only be called from softirq context and interrupts
2016 * should be enabled.
2017 *
2018 * Return values (usually ignored):
2019 * NET_RX_SUCCESS: no congestion
2020 * NET_RX_DROP: packet was dropped
2021 */
2004int netif_receive_skb(struct sk_buff *skb) 2022int netif_receive_skb(struct sk_buff *skb)
2005{ 2023{
2006 struct packet_type *ptype, *pt_prev; 2024 struct packet_type *ptype, *pt_prev;
@@ -2063,7 +2081,8 @@ ncls:
2063 goto out; 2081 goto out;
2064 2082
2065 type = skb->protocol; 2083 type = skb->protocol;
2066 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { 2084 list_for_each_entry_rcu(ptype,
2085 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2067 if (ptype->type == type && 2086 if (ptype->type == type &&
2068 (!ptype->dev || ptype->dev == skb->dev)) { 2087 (!ptype->dev || ptype->dev == skb->dev)) {
2069 if (pt_prev) 2088 if (pt_prev)
@@ -2172,7 +2191,15 @@ static void net_rx_action(struct softirq_action *h)
2172 2191
2173 weight = n->weight; 2192 weight = n->weight;
2174 2193
2175 work = n->poll(n, weight); 2194 /* This NAPI_STATE_SCHED test is for avoiding a race
2195 * with netpoll's poll_napi(). Only the entity which
2196 * obtains the lock and sees NAPI_STATE_SCHED set will
2197 * actually make the ->poll() call. Therefore we avoid
2198 * accidently calling ->poll() when NAPI is not scheduled.
2199 */
2200 work = 0;
2201 if (test_bit(NAPI_STATE_SCHED, &n->state))
2202 work = n->poll(n, weight);
2176 2203
2177 WARN_ON_ONCE(work > weight); 2204 WARN_ON_ONCE(work > weight);
2178 2205
@@ -2185,8 +2212,12 @@ static void net_rx_action(struct softirq_action *h)
2185 * still "owns" the NAPI instance and therefore can 2212 * still "owns" the NAPI instance and therefore can
2186 * move the instance around on the list at-will. 2213 * move the instance around on the list at-will.
2187 */ 2214 */
2188 if (unlikely(work == weight)) 2215 if (unlikely(work == weight)) {
2189 list_move_tail(&n->poll_list, list); 2216 if (unlikely(napi_disable_pending(n)))
2217 __napi_complete(n);
2218 else
2219 list_move_tail(&n->poll_list, list);
2220 }
2190 2221
2191 netpoll_poll_unlock(have); 2222 netpoll_poll_unlock(have);
2192 } 2223 }
@@ -2337,8 +2368,9 @@ static int dev_ifconf(struct net *net, char __user *arg)
2337 * in detail. 2368 * in detail.
2338 */ 2369 */
2339void *dev_seq_start(struct seq_file *seq, loff_t *pos) 2370void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2371 __acquires(dev_base_lock)
2340{ 2372{
2341 struct net *net = seq->private; 2373 struct net *net = seq_file_net(seq);
2342 loff_t off; 2374 loff_t off;
2343 struct net_device *dev; 2375 struct net_device *dev;
2344 2376
@@ -2356,13 +2388,14 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2356 2388
2357void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2389void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2358{ 2390{
2359 struct net *net = seq->private; 2391 struct net *net = seq_file_net(seq);
2360 ++*pos; 2392 ++*pos;
2361 return v == SEQ_START_TOKEN ? 2393 return v == SEQ_START_TOKEN ?
2362 first_net_device(net) : next_net_device((struct net_device *)v); 2394 first_net_device(net) : next_net_device((struct net_device *)v);
2363} 2395}
2364 2396
2365void dev_seq_stop(struct seq_file *seq, void *v) 2397void dev_seq_stop(struct seq_file *seq, void *v)
2398 __releases(dev_base_lock)
2366{ 2399{
2367 read_unlock(&dev_base_lock); 2400 read_unlock(&dev_base_lock);
2368} 2401}
@@ -2455,26 +2488,8 @@ static const struct seq_operations dev_seq_ops = {
2455 2488
2456static int dev_seq_open(struct inode *inode, struct file *file) 2489static int dev_seq_open(struct inode *inode, struct file *file)
2457{ 2490{
2458 struct seq_file *seq; 2491 return seq_open_net(inode, file, &dev_seq_ops,
2459 int res; 2492 sizeof(struct seq_net_private));
2460 res = seq_open(file, &dev_seq_ops);
2461 if (!res) {
2462 seq = file->private_data;
2463 seq->private = get_proc_net(inode);
2464 if (!seq->private) {
2465 seq_release(inode, file);
2466 res = -ENXIO;
2467 }
2468 }
2469 return res;
2470}
2471
2472static int dev_seq_release(struct inode *inode, struct file *file)
2473{
2474 struct seq_file *seq = file->private_data;
2475 struct net *net = seq->private;
2476 put_net(net);
2477 return seq_release(inode, file);
2478} 2493}
2479 2494
2480static const struct file_operations dev_seq_fops = { 2495static const struct file_operations dev_seq_fops = {
@@ -2482,7 +2497,7 @@ static const struct file_operations dev_seq_fops = {
2482 .open = dev_seq_open, 2497 .open = dev_seq_open,
2483 .read = seq_read, 2498 .read = seq_read,
2484 .llseek = seq_lseek, 2499 .llseek = seq_lseek,
2485 .release = dev_seq_release, 2500 .release = seq_release_net,
2486}; 2501};
2487 2502
2488static const struct seq_operations softnet_seq_ops = { 2503static const struct seq_operations softnet_seq_ops = {
@@ -2517,7 +2532,7 @@ static void *ptype_get_idx(loff_t pos)
2517 ++i; 2532 ++i;
2518 } 2533 }
2519 2534
2520 for (t = 0; t < 16; t++) { 2535 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2521 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 2536 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2522 if (i == pos) 2537 if (i == pos)
2523 return pt; 2538 return pt;
@@ -2528,6 +2543,7 @@ static void *ptype_get_idx(loff_t pos)
2528} 2543}
2529 2544
2530static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 2545static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2546 __acquires(RCU)
2531{ 2547{
2532 rcu_read_lock(); 2548 rcu_read_lock();
2533 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; 2549 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
@@ -2551,10 +2567,10 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2551 hash = 0; 2567 hash = 0;
2552 nxt = ptype_base[0].next; 2568 nxt = ptype_base[0].next;
2553 } else 2569 } else
2554 hash = ntohs(pt->type) & 15; 2570 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2555 2571
2556 while (nxt == &ptype_base[hash]) { 2572 while (nxt == &ptype_base[hash]) {
2557 if (++hash >= 16) 2573 if (++hash >= PTYPE_HASH_SIZE)
2558 return NULL; 2574 return NULL;
2559 nxt = ptype_base[hash].next; 2575 nxt = ptype_base[hash].next;
2560 } 2576 }
@@ -2563,6 +2579,7 @@ found:
2563} 2579}
2564 2580
2565static void ptype_seq_stop(struct seq_file *seq, void *v) 2581static void ptype_seq_stop(struct seq_file *seq, void *v)
2582 __releases(RCU)
2566{ 2583{
2567 rcu_read_unlock(); 2584 rcu_read_unlock();
2568} 2585}
@@ -2668,7 +2685,7 @@ static void __net_exit dev_proc_net_exit(struct net *net)
2668 proc_net_remove(net, "dev"); 2685 proc_net_remove(net, "dev");
2669} 2686}
2670 2687
2671static struct pernet_operations dev_proc_ops = { 2688static struct pernet_operations __net_initdata dev_proc_ops = {
2672 .init = dev_proc_net_init, 2689 .init = dev_proc_net_init,
2673 .exit = dev_proc_net_exit, 2690 .exit = dev_proc_net_exit,
2674}; 2691};
@@ -2735,12 +2752,15 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
2735 printk(KERN_INFO "device %s %s promiscuous mode\n", 2752 printk(KERN_INFO "device %s %s promiscuous mode\n",
2736 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2753 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2737 "left"); 2754 "left");
2738 audit_log(current->audit_context, GFP_ATOMIC, 2755 if (audit_enabled)
2739 AUDIT_ANOM_PROMISCUOUS, 2756 audit_log(current->audit_context, GFP_ATOMIC,
2740 "dev=%s prom=%d old_prom=%d auid=%u", 2757 AUDIT_ANOM_PROMISCUOUS,
2741 dev->name, (dev->flags & IFF_PROMISC), 2758 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2742 (old_flags & IFF_PROMISC), 2759 dev->name, (dev->flags & IFF_PROMISC),
2743 audit_get_loginuid(current->audit_context)); 2760 (old_flags & IFF_PROMISC),
2761 audit_get_loginuid(current),
2762 current->uid, current->gid,
2763 audit_get_sessionid(current));
2744 2764
2745 if (dev->change_rx_flags) 2765 if (dev->change_rx_flags)
2746 dev->change_rx_flags(dev, IFF_PROMISC); 2766 dev->change_rx_flags(dev, IFF_PROMISC);
@@ -2797,7 +2817,7 @@ void dev_set_allmulti(struct net_device *dev, int inc)
2797/* 2817/*
2798 * Upload unicast and multicast address lists to device and 2818 * Upload unicast and multicast address lists to device and
2799 * configure RX filtering. When the device doesn't support unicast 2819 * configure RX filtering. When the device doesn't support unicast
2800 * filtering it is put in promiscous mode while unicast addresses 2820 * filtering it is put in promiscuous mode while unicast addresses
2801 * are present. 2821 * are present.
2802 */ 2822 */
2803void __dev_set_rx_mode(struct net_device *dev) 2823void __dev_set_rx_mode(struct net_device *dev)
@@ -2945,6 +2965,102 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2945} 2965}
2946EXPORT_SYMBOL(dev_unicast_add); 2966EXPORT_SYMBOL(dev_unicast_add);
2947 2967
2968int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
2969 struct dev_addr_list **from, int *from_count)
2970{
2971 struct dev_addr_list *da, *next;
2972 int err = 0;
2973
2974 da = *from;
2975 while (da != NULL) {
2976 next = da->next;
2977 if (!da->da_synced) {
2978 err = __dev_addr_add(to, to_count,
2979 da->da_addr, da->da_addrlen, 0);
2980 if (err < 0)
2981 break;
2982 da->da_synced = 1;
2983 da->da_users++;
2984 } else if (da->da_users == 1) {
2985 __dev_addr_delete(to, to_count,
2986 da->da_addr, da->da_addrlen, 0);
2987 __dev_addr_delete(from, from_count,
2988 da->da_addr, da->da_addrlen, 0);
2989 }
2990 da = next;
2991 }
2992 return err;
2993}
2994
2995void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
2996 struct dev_addr_list **from, int *from_count)
2997{
2998 struct dev_addr_list *da, *next;
2999
3000 da = *from;
3001 while (da != NULL) {
3002 next = da->next;
3003 if (da->da_synced) {
3004 __dev_addr_delete(to, to_count,
3005 da->da_addr, da->da_addrlen, 0);
3006 da->da_synced = 0;
3007 __dev_addr_delete(from, from_count,
3008 da->da_addr, da->da_addrlen, 0);
3009 }
3010 da = next;
3011 }
3012}
3013
3014/**
3015 * dev_unicast_sync - Synchronize device's unicast list to another device
3016 * @to: destination device
3017 * @from: source device
3018 *
3019 * Add newly added addresses to the destination device and release
3020 * addresses that have no users left. The source device must be
3021 * locked by netif_tx_lock_bh.
3022 *
3023 * This function is intended to be called from the dev->set_rx_mode
3024 * function of layered software devices.
3025 */
3026int dev_unicast_sync(struct net_device *to, struct net_device *from)
3027{
3028 int err = 0;
3029
3030 netif_tx_lock_bh(to);
3031 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3032 &from->uc_list, &from->uc_count);
3033 if (!err)
3034 __dev_set_rx_mode(to);
3035 netif_tx_unlock_bh(to);
3036 return err;
3037}
3038EXPORT_SYMBOL(dev_unicast_sync);
3039
3040/**
3041 * dev_unicast_unsync - Remove synchronized addresses from the destination
3042 * device
3043 * @to: destination device
3044 * @from: source device
3045 *
3046 * Remove all addresses that were added to the destination device by
3047 * dev_unicast_sync(). This function is intended to be called from the
3048 * dev->stop function of layered software devices.
3049 */
3050void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3051{
3052 netif_tx_lock_bh(from);
3053 netif_tx_lock_bh(to);
3054
3055 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3056 &from->uc_list, &from->uc_count);
3057 __dev_set_rx_mode(to);
3058
3059 netif_tx_unlock_bh(to);
3060 netif_tx_unlock_bh(from);
3061}
3062EXPORT_SYMBOL(dev_unicast_unsync);
3063
2948static void __dev_addr_discard(struct dev_addr_list **list) 3064static void __dev_addr_discard(struct dev_addr_list **list)
2949{ 3065{
2950 struct dev_addr_list *tmp; 3066 struct dev_addr_list *tmp;
@@ -3479,7 +3595,7 @@ static int dev_new_index(struct net *net)
3479 3595
3480/* Delayed registration/unregisteration */ 3596/* Delayed registration/unregisteration */
3481static DEFINE_SPINLOCK(net_todo_list_lock); 3597static DEFINE_SPINLOCK(net_todo_list_lock);
3482static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); 3598static LIST_HEAD(net_todo_list);
3483 3599
3484static void net_set_todo(struct net_device *dev) 3600static void net_set_todo(struct net_device *dev)
3485{ 3601{
@@ -3488,6 +3604,60 @@ static void net_set_todo(struct net_device *dev)
3488 spin_unlock(&net_todo_list_lock); 3604 spin_unlock(&net_todo_list_lock);
3489} 3605}
3490 3606
3607static void rollback_registered(struct net_device *dev)
3608{
3609 BUG_ON(dev_boot_phase);
3610 ASSERT_RTNL();
3611
3612 /* Some devices call without registering for initialization unwind. */
3613 if (dev->reg_state == NETREG_UNINITIALIZED) {
3614 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3615 "was registered\n", dev->name, dev);
3616
3617 WARN_ON(1);
3618 return;
3619 }
3620
3621 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3622
3623 /* If device is running, close it first. */
3624 dev_close(dev);
3625
3626 /* And unlink it from device chain. */
3627 unlist_netdevice(dev);
3628
3629 dev->reg_state = NETREG_UNREGISTERING;
3630
3631 synchronize_net();
3632
3633 /* Shutdown queueing discipline. */
3634 dev_shutdown(dev);
3635
3636
3637 /* Notify protocols, that we are about to destroy
3638 this device. They should clean all the things.
3639 */
3640 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3641
3642 /*
3643 * Flush the unicast and multicast chains
3644 */
3645 dev_addr_discard(dev);
3646
3647 if (dev->uninit)
3648 dev->uninit(dev);
3649
3650 /* Notifier chain MUST detach us from master device. */
3651 BUG_TRAP(!dev->master);
3652
3653 /* Remove entries from kobject tree */
3654 netdev_unregister_kobject(dev);
3655
3656 synchronize_net();
3657
3658 dev_put(dev);
3659}
3660
3491/** 3661/**
3492 * register_netdevice - register a network device 3662 * register_netdevice - register a network device
3493 * @dev: device to register 3663 * @dev: device to register
@@ -3625,8 +3795,10 @@ int register_netdevice(struct net_device *dev)
3625 /* Notify protocols, that a new device appeared. */ 3795 /* Notify protocols, that a new device appeared. */
3626 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 3796 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
3627 ret = notifier_to_errno(ret); 3797 ret = notifier_to_errno(ret);
3628 if (ret) 3798 if (ret) {
3629 unregister_netdevice(dev); 3799 rollback_registered(dev);
3800 dev->reg_state = NETREG_UNREGISTERED;
3801 }
3630 3802
3631out: 3803out:
3632 return ret; 3804 return ret;
@@ -3894,8 +4066,7 @@ void synchronize_net(void)
3894 * @dev: device 4066 * @dev: device
3895 * 4067 *
3896 * This function shuts down a device interface and removes it 4068 * This function shuts down a device interface and removes it
3897 * from the kernel tables. On success 0 is returned, on a failure 4069 * from the kernel tables.
3898 * a negative errno code is returned.
3899 * 4070 *
3900 * Callers must hold the rtnl semaphore. You may want 4071 * Callers must hold the rtnl semaphore. You may want
3901 * unregister_netdev() instead of this. 4072 * unregister_netdev() instead of this.
@@ -3903,59 +4074,11 @@ void synchronize_net(void)
3903 4074
3904void unregister_netdevice(struct net_device *dev) 4075void unregister_netdevice(struct net_device *dev)
3905{ 4076{
3906 BUG_ON(dev_boot_phase);
3907 ASSERT_RTNL(); 4077 ASSERT_RTNL();
3908 4078
3909 /* Some devices call without registering for initialization unwind. */ 4079 rollback_registered(dev);
3910 if (dev->reg_state == NETREG_UNINITIALIZED) {
3911 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3912 "was registered\n", dev->name, dev);
3913
3914 WARN_ON(1);
3915 return;
3916 }
3917
3918 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3919
3920 /* If device is running, close it first. */
3921 dev_close(dev);
3922
3923 /* And unlink it from device chain. */
3924 unlist_netdevice(dev);
3925
3926 dev->reg_state = NETREG_UNREGISTERING;
3927
3928 synchronize_net();
3929
3930 /* Shutdown queueing discipline. */
3931 dev_shutdown(dev);
3932
3933
3934 /* Notify protocols, that we are about to destroy
3935 this device. They should clean all the things.
3936 */
3937 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3938
3939 /*
3940 * Flush the unicast and multicast chains
3941 */
3942 dev_addr_discard(dev);
3943
3944 if (dev->uninit)
3945 dev->uninit(dev);
3946
3947 /* Notifier chain MUST detach us from master device. */
3948 BUG_TRAP(!dev->master);
3949
3950 /* Remove entries from kobject tree */
3951 netdev_unregister_kobject(dev);
3952
3953 /* Finish processing unregister after unlock */ 4080 /* Finish processing unregister after unlock */
3954 net_set_todo(dev); 4081 net_set_todo(dev);
3955
3956 synchronize_net();
3957
3958 dev_put(dev);
3959} 4082}
3960 4083
3961/** 4084/**
@@ -3963,8 +4086,7 @@ void unregister_netdevice(struct net_device *dev)
3963 * @dev: device 4086 * @dev: device
3964 * 4087 *
3965 * This function shuts down a device interface and removes it 4088 * This function shuts down a device interface and removes it
3966 * from the kernel tables. On success 0 is returned, on a failure 4089 * from the kernel tables.
3967 * a negative errno code is returned.
3968 * 4090 *
3969 * This is just a wrapper for unregister_netdevice that takes 4091 * This is just a wrapper for unregister_netdevice that takes
3970 * the rtnl semaphore. In general you want to use this and not 4092 * the rtnl semaphore. In general you want to use this and not
@@ -4304,7 +4426,6 @@ static struct hlist_head *netdev_create_hash(void)
4304static int __net_init netdev_init(struct net *net) 4426static int __net_init netdev_init(struct net *net)
4305{ 4427{
4306 INIT_LIST_HEAD(&net->dev_base_head); 4428 INIT_LIST_HEAD(&net->dev_base_head);
4307 rwlock_init(&dev_base_lock);
4308 4429
4309 net->dev_name_head = netdev_create_hash(); 4430 net->dev_name_head = netdev_create_hash();
4310 if (net->dev_name_head == NULL) 4431 if (net->dev_name_head == NULL)
@@ -4328,7 +4449,7 @@ static void __net_exit netdev_exit(struct net *net)
4328 kfree(net->dev_index_head); 4449 kfree(net->dev_index_head);
4329} 4450}
4330 4451
4331static struct pernet_operations netdev_net_ops = { 4452static struct pernet_operations __net_initdata netdev_net_ops = {
4332 .init = netdev_init, 4453 .init = netdev_init,
4333 .exit = netdev_exit, 4454 .exit = netdev_exit,
4334}; 4455};
@@ -4359,7 +4480,7 @@ static void __net_exit default_device_exit(struct net *net)
4359 rtnl_unlock(); 4480 rtnl_unlock();
4360} 4481}
4361 4482
4362static struct pernet_operations default_device_ops = { 4483static struct pernet_operations __net_initdata default_device_ops = {
4363 .exit = default_device_exit, 4484 .exit = default_device_exit,
4364}; 4485};
4365 4486
@@ -4387,7 +4508,7 @@ static int __init net_dev_init(void)
4387 goto out; 4508 goto out;
4388 4509
4389 INIT_LIST_HEAD(&ptype_all); 4510 INIT_LIST_HEAD(&ptype_all);
4390 for (i = 0; i < 16; i++) 4511 for (i = 0; i < PTYPE_HASH_SIZE; i++)
4391 INIT_LIST_HEAD(&ptype_base[i]); 4512 INIT_LIST_HEAD(&ptype_base[i]);
4392 4513
4393 if (register_pernet_subsys(&netdev_net_ops)) 4514 if (register_pernet_subsys(&netdev_net_ops))
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index ae354057d84c..cec582563e0d 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -113,32 +113,15 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
113 * locked by netif_tx_lock_bh. 113 * locked by netif_tx_lock_bh.
114 * 114 *
115 * This function is intended to be called from the dev->set_multicast_list 115 * This function is intended to be called from the dev->set_multicast_list
116 * function of layered software devices. 116 * or dev->set_rx_mode function of layered software devices.
117 */ 117 */
118int dev_mc_sync(struct net_device *to, struct net_device *from) 118int dev_mc_sync(struct net_device *to, struct net_device *from)
119{ 119{
120 struct dev_addr_list *da, *next;
121 int err = 0; 120 int err = 0;
122 121
123 netif_tx_lock_bh(to); 122 netif_tx_lock_bh(to);
124 da = from->mc_list; 123 err = __dev_addr_sync(&to->mc_list, &to->mc_count,
125 while (da != NULL) { 124 &from->mc_list, &from->mc_count);
126 next = da->next;
127 if (!da->da_synced) {
128 err = __dev_addr_add(&to->mc_list, &to->mc_count,
129 da->da_addr, da->da_addrlen, 0);
130 if (err < 0)
131 break;
132 da->da_synced = 1;
133 da->da_users++;
134 } else if (da->da_users == 1) {
135 __dev_addr_delete(&to->mc_list, &to->mc_count,
136 da->da_addr, da->da_addrlen, 0);
137 __dev_addr_delete(&from->mc_list, &from->mc_count,
138 da->da_addr, da->da_addrlen, 0);
139 }
140 da = next;
141 }
142 if (!err) 125 if (!err)
143 __dev_set_rx_mode(to); 126 __dev_set_rx_mode(to);
144 netif_tx_unlock_bh(to); 127 netif_tx_unlock_bh(to);
@@ -160,23 +143,11 @@ EXPORT_SYMBOL(dev_mc_sync);
160 */ 143 */
161void dev_mc_unsync(struct net_device *to, struct net_device *from) 144void dev_mc_unsync(struct net_device *to, struct net_device *from)
162{ 145{
163 struct dev_addr_list *da, *next;
164
165 netif_tx_lock_bh(from); 146 netif_tx_lock_bh(from);
166 netif_tx_lock_bh(to); 147 netif_tx_lock_bh(to);
167 148
168 da = from->mc_list; 149 __dev_addr_unsync(&to->mc_list, &to->mc_count,
169 while (da != NULL) { 150 &from->mc_list, &from->mc_count);
170 next = da->next;
171 if (!da->da_synced)
172 continue;
173 __dev_addr_delete(&to->mc_list, &to->mc_count,
174 da->da_addr, da->da_addrlen, 0);
175 da->da_synced = 0;
176 __dev_addr_delete(&from->mc_list, &from->mc_count,
177 da->da_addr, da->da_addrlen, 0);
178 da = next;
179 }
180 __dev_set_rx_mode(to); 151 __dev_set_rx_mode(to);
181 152
182 netif_tx_unlock_bh(to); 153 netif_tx_unlock_bh(to);
@@ -186,8 +157,9 @@ EXPORT_SYMBOL(dev_mc_unsync);
186 157
187#ifdef CONFIG_PROC_FS 158#ifdef CONFIG_PROC_FS
188static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) 159static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
160 __acquires(dev_base_lock)
189{ 161{
190 struct net *net = seq->private; 162 struct net *net = seq_file_net(seq);
191 struct net_device *dev; 163 struct net_device *dev;
192 loff_t off = 0; 164 loff_t off = 0;
193 165
@@ -206,6 +178,7 @@ static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
206} 178}
207 179
208static void dev_mc_seq_stop(struct seq_file *seq, void *v) 180static void dev_mc_seq_stop(struct seq_file *seq, void *v)
181 __releases(dev_base_lock)
209{ 182{
210 read_unlock(&dev_base_lock); 183 read_unlock(&dev_base_lock);
211} 184}
@@ -241,26 +214,8 @@ static const struct seq_operations dev_mc_seq_ops = {
241 214
242static int dev_mc_seq_open(struct inode *inode, struct file *file) 215static int dev_mc_seq_open(struct inode *inode, struct file *file)
243{ 216{
244 struct seq_file *seq; 217 return seq_open_net(inode, file, &dev_mc_seq_ops,
245 int res; 218 sizeof(struct seq_net_private));
246 res = seq_open(file, &dev_mc_seq_ops);
247 if (!res) {
248 seq = file->private_data;
249 seq->private = get_proc_net(inode);
250 if (!seq->private) {
251 seq_release(inode, file);
252 res = -ENXIO;
253 }
254 }
255 return res;
256}
257
258static int dev_mc_seq_release(struct inode *inode, struct file *file)
259{
260 struct seq_file *seq = file->private_data;
261 struct net *net = seq->private;
262 put_net(net);
263 return seq_release(inode, file);
264} 219}
265 220
266static const struct file_operations dev_mc_seq_fops = { 221static const struct file_operations dev_mc_seq_fops = {
@@ -268,7 +223,7 @@ static const struct file_operations dev_mc_seq_fops = {
268 .open = dev_mc_seq_open, 223 .open = dev_mc_seq_open,
269 .read = seq_read, 224 .read = seq_read,
270 .llseek = seq_lseek, 225 .llseek = seq_lseek,
271 .release = dev_mc_seq_release, 226 .release = seq_release_net,
272}; 227};
273 228
274#endif 229#endif
@@ -285,7 +240,7 @@ static void __net_exit dev_mc_net_exit(struct net *net)
285 proc_net_remove(net, "dev_mcast"); 240 proc_net_remove(net, "dev_mcast");
286} 241}
287 242
288static struct pernet_operations dev_mc_net_ops = { 243static struct pernet_operations __net_initdata dev_mc_net_ops = {
289 .init = dev_mc_net_init, 244 .init = dev_mc_net_init,
290 .exit = dev_mc_net_exit, 245 .exit = dev_mc_net_exit,
291}; 246};
diff --git a/net/core/dst.c b/net/core/dst.c
index 16958e64e577..7deef483c79f 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -18,7 +18,6 @@
18#include <linux/types.h> 18#include <linux/types.h>
19#include <net/net_namespace.h> 19#include <net/net_namespace.h>
20 20
21#include <net/net_namespace.h>
22#include <net/dst.h> 21#include <net/dst.h>
23 22
24/* 23/*
@@ -154,18 +153,19 @@ loop:
154#endif 153#endif
155} 154}
156 155
157static int dst_discard(struct sk_buff *skb) 156int dst_discard(struct sk_buff *skb)
158{ 157{
159 kfree_skb(skb); 158 kfree_skb(skb);
160 return 0; 159 return 0;
161} 160}
161EXPORT_SYMBOL(dst_discard);
162 162
163void * dst_alloc(struct dst_ops * ops) 163void * dst_alloc(struct dst_ops * ops)
164{ 164{
165 struct dst_entry * dst; 165 struct dst_entry * dst;
166 166
167 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 167 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
168 if (ops->gc()) 168 if (ops->gc(ops))
169 return NULL; 169 return NULL;
170 } 170 }
171 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); 171 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
@@ -279,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
279 if (!unregister) { 279 if (!unregister) {
280 dst->input = dst->output = dst_discard; 280 dst->input = dst->output = dst_discard;
281 } else { 281 } else {
282 dst->dev = init_net.loopback_dev; 282 dst->dev = dst->dev->nd_net->loopback_dev;
283 dev_hold(dst->dev); 283 dev_hold(dst->dev);
284 dev_put(dev); 284 dev_put(dev);
285 if (dst->neighbour && dst->neighbour->dev == dev) { 285 if (dst->neighbour && dst->neighbour->dev == dev) {
286 dst->neighbour->dev = init_net.loopback_dev; 286 dst->neighbour->dev = dst->dev;
287 dev_hold(dst->dev);
287 dev_put(dev); 288 dev_put(dev);
288 dev_hold(dst->neighbour->dev);
289 } 289 }
290 } 290 }
291} 291}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 13de6f53f098..42ccaf5b8509 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -15,19 +15,39 @@
15#include <net/sock.h> 15#include <net/sock.h>
16#include <net/fib_rules.h> 16#include <net/fib_rules.h>
17 17
18static LIST_HEAD(rules_ops); 18int fib_default_rule_add(struct fib_rules_ops *ops,
19static DEFINE_SPINLOCK(rules_mod_lock); 19 u32 pref, u32 table, u32 flags)
20{
21 struct fib_rule *r;
22
23 r = kzalloc(ops->rule_size, GFP_KERNEL);
24 if (r == NULL)
25 return -ENOMEM;
26
27 atomic_set(&r->refcnt, 1);
28 r->action = FR_ACT_TO_TBL;
29 r->pref = pref;
30 r->table = table;
31 r->flags = flags;
32 r->fr_net = ops->fro_net;
33
34 /* The lock is not required here, the list in unreacheable
35 * at the moment this function is called */
36 list_add_tail(&r->list, &ops->rules_list);
37 return 0;
38}
39EXPORT_SYMBOL(fib_default_rule_add);
20 40
21static void notify_rule_change(int event, struct fib_rule *rule, 41static void notify_rule_change(int event, struct fib_rule *rule,
22 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 42 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
23 u32 pid); 43 u32 pid);
24 44
25static struct fib_rules_ops *lookup_rules_ops(int family) 45static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
26{ 46{
27 struct fib_rules_ops *ops; 47 struct fib_rules_ops *ops;
28 48
29 rcu_read_lock(); 49 rcu_read_lock();
30 list_for_each_entry_rcu(ops, &rules_ops, list) { 50 list_for_each_entry_rcu(ops, &net->rules_ops, list) {
31 if (ops->family == family) { 51 if (ops->family == family) {
32 if (!try_module_get(ops->owner)) 52 if (!try_module_get(ops->owner))
33 ops = NULL; 53 ops = NULL;
@@ -56,6 +76,9 @@ int fib_rules_register(struct fib_rules_ops *ops)
56{ 76{
57 int err = -EEXIST; 77 int err = -EEXIST;
58 struct fib_rules_ops *o; 78 struct fib_rules_ops *o;
79 struct net *net;
80
81 net = ops->fro_net;
59 82
60 if (ops->rule_size < sizeof(struct fib_rule)) 83 if (ops->rule_size < sizeof(struct fib_rule))
61 return -EINVAL; 84 return -EINVAL;
@@ -65,22 +88,23 @@ int fib_rules_register(struct fib_rules_ops *ops)
65 ops->action == NULL) 88 ops->action == NULL)
66 return -EINVAL; 89 return -EINVAL;
67 90
68 spin_lock(&rules_mod_lock); 91 spin_lock(&net->rules_mod_lock);
69 list_for_each_entry(o, &rules_ops, list) 92 list_for_each_entry(o, &net->rules_ops, list)
70 if (ops->family == o->family) 93 if (ops->family == o->family)
71 goto errout; 94 goto errout;
72 95
73 list_add_tail_rcu(&ops->list, &rules_ops); 96 hold_net(net);
97 list_add_tail_rcu(&ops->list, &net->rules_ops);
74 err = 0; 98 err = 0;
75errout: 99errout:
76 spin_unlock(&rules_mod_lock); 100 spin_unlock(&net->rules_mod_lock);
77 101
78 return err; 102 return err;
79} 103}
80 104
81EXPORT_SYMBOL_GPL(fib_rules_register); 105EXPORT_SYMBOL_GPL(fib_rules_register);
82 106
83static void cleanup_ops(struct fib_rules_ops *ops) 107void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
84{ 108{
85 struct fib_rule *rule, *tmp; 109 struct fib_rule *rule, *tmp;
86 110
@@ -89,28 +113,19 @@ static void cleanup_ops(struct fib_rules_ops *ops)
89 fib_rule_put(rule); 113 fib_rule_put(rule);
90 } 114 }
91} 115}
116EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
92 117
93int fib_rules_unregister(struct fib_rules_ops *ops) 118void fib_rules_unregister(struct fib_rules_ops *ops)
94{ 119{
95 int err = 0; 120 struct net *net = ops->fro_net;
96 struct fib_rules_ops *o;
97 121
98 spin_lock(&rules_mod_lock); 122 spin_lock(&net->rules_mod_lock);
99 list_for_each_entry(o, &rules_ops, list) { 123 list_del_rcu(&ops->list);
100 if (o == ops) { 124 fib_rules_cleanup_ops(ops);
101 list_del_rcu(&o->list); 125 spin_unlock(&net->rules_mod_lock);
102 cleanup_ops(ops);
103 goto out;
104 }
105 }
106
107 err = -ENOENT;
108out:
109 spin_unlock(&rules_mod_lock);
110 126
111 synchronize_rcu(); 127 synchronize_rcu();
112 128 release_net(net);
113 return err;
114} 129}
115 130
116EXPORT_SYMBOL_GPL(fib_rules_unregister); 131EXPORT_SYMBOL_GPL(fib_rules_unregister);
@@ -209,7 +224,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
209 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 224 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
210 goto errout; 225 goto errout;
211 226
212 ops = lookup_rules_ops(frh->family); 227 ops = lookup_rules_ops(net, frh->family);
213 if (ops == NULL) { 228 if (ops == NULL) {
214 err = EAFNOSUPPORT; 229 err = EAFNOSUPPORT;
215 goto errout; 230 goto errout;
@@ -228,6 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
228 err = -ENOMEM; 243 err = -ENOMEM;
229 goto errout; 244 goto errout;
230 } 245 }
246 rule->fr_net = net;
231 247
232 if (tb[FRA_PRIORITY]) 248 if (tb[FRA_PRIORITY])
233 rule->pref = nla_get_u32(tb[FRA_PRIORITY]); 249 rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
@@ -259,7 +275,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
259 rule->table = frh_get_table(frh, tb); 275 rule->table = frh_get_table(frh, tb);
260 276
261 if (!rule->pref && ops->default_pref) 277 if (!rule->pref && ops->default_pref)
262 rule->pref = ops->default_pref(); 278 rule->pref = ops->default_pref(ops);
263 279
264 err = -EINVAL; 280 err = -EINVAL;
265 if (tb[FRA_GOTO]) { 281 if (tb[FRA_GOTO]) {
@@ -336,6 +352,7 @@ errout:
336 352
337static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 353static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
338{ 354{
355 struct net *net = skb->sk->sk_net;
339 struct fib_rule_hdr *frh = nlmsg_data(nlh); 356 struct fib_rule_hdr *frh = nlmsg_data(nlh);
340 struct fib_rules_ops *ops = NULL; 357 struct fib_rules_ops *ops = NULL;
341 struct fib_rule *rule, *tmp; 358 struct fib_rule *rule, *tmp;
@@ -345,7 +362,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
345 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 362 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
346 goto errout; 363 goto errout;
347 364
348 ops = lookup_rules_ops(frh->family); 365 ops = lookup_rules_ops(net, frh->family);
349 if (ops == NULL) { 366 if (ops == NULL) {
350 err = EAFNOSUPPORT; 367 err = EAFNOSUPPORT;
351 goto errout; 368 goto errout;
@@ -517,13 +534,14 @@ skip:
517 534
518static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) 535static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
519{ 536{
537 struct net *net = skb->sk->sk_net;
520 struct fib_rules_ops *ops; 538 struct fib_rules_ops *ops;
521 int idx = 0, family; 539 int idx = 0, family;
522 540
523 family = rtnl_msg_family(cb->nlh); 541 family = rtnl_msg_family(cb->nlh);
524 if (family != AF_UNSPEC) { 542 if (family != AF_UNSPEC) {
525 /* Protocol specific dump request */ 543 /* Protocol specific dump request */
526 ops = lookup_rules_ops(family); 544 ops = lookup_rules_ops(net, family);
527 if (ops == NULL) 545 if (ops == NULL)
528 return -EAFNOSUPPORT; 546 return -EAFNOSUPPORT;
529 547
@@ -531,7 +549,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
531 } 549 }
532 550
533 rcu_read_lock(); 551 rcu_read_lock();
534 list_for_each_entry_rcu(ops, &rules_ops, list) { 552 list_for_each_entry_rcu(ops, &net->rules_ops, list) {
535 if (idx < cb->args[0] || !try_module_get(ops->owner)) 553 if (idx < cb->args[0] || !try_module_get(ops->owner))
536 goto skip; 554 goto skip;
537 555
@@ -552,9 +570,11 @@ static void notify_rule_change(int event, struct fib_rule *rule,
552 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 570 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
553 u32 pid) 571 u32 pid)
554{ 572{
573 struct net *net;
555 struct sk_buff *skb; 574 struct sk_buff *skb;
556 int err = -ENOBUFS; 575 int err = -ENOBUFS;
557 576
577 net = ops->fro_net;
558 skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); 578 skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
559 if (skb == NULL) 579 if (skb == NULL)
560 goto errout; 580 goto errout;
@@ -566,10 +586,11 @@ static void notify_rule_change(int event, struct fib_rule *rule,
566 kfree_skb(skb); 586 kfree_skb(skb);
567 goto errout; 587 goto errout;
568 } 588 }
569 err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); 589
590 err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
570errout: 591errout:
571 if (err < 0) 592 if (err < 0)
572 rtnl_set_sk_err(ops->nlgroup, err); 593 rtnl_set_sk_err(net, ops->nlgroup, err);
573} 594}
574 595
575static void attach_rules(struct list_head *rules, struct net_device *dev) 596static void attach_rules(struct list_head *rules, struct net_device *dev)
@@ -597,22 +618,20 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
597 void *ptr) 618 void *ptr)
598{ 619{
599 struct net_device *dev = ptr; 620 struct net_device *dev = ptr;
621 struct net *net = dev->nd_net;
600 struct fib_rules_ops *ops; 622 struct fib_rules_ops *ops;
601 623
602 if (dev->nd_net != &init_net)
603 return NOTIFY_DONE;
604
605 ASSERT_RTNL(); 624 ASSERT_RTNL();
606 rcu_read_lock(); 625 rcu_read_lock();
607 626
608 switch (event) { 627 switch (event) {
609 case NETDEV_REGISTER: 628 case NETDEV_REGISTER:
610 list_for_each_entry(ops, &rules_ops, list) 629 list_for_each_entry(ops, &net->rules_ops, list)
611 attach_rules(&ops->rules_list, dev); 630 attach_rules(&ops->rules_list, dev);
612 break; 631 break;
613 632
614 case NETDEV_UNREGISTER: 633 case NETDEV_UNREGISTER:
615 list_for_each_entry(ops, &rules_ops, list) 634 list_for_each_entry(ops, &net->rules_ops, list)
616 detach_rules(&ops->rules_list, dev); 635 detach_rules(&ops->rules_list, dev);
617 break; 636 break;
618 } 637 }
@@ -626,13 +645,40 @@ static struct notifier_block fib_rules_notifier = {
626 .notifier_call = fib_rules_event, 645 .notifier_call = fib_rules_event,
627}; 646};
628 647
648static int fib_rules_net_init(struct net *net)
649{
650 INIT_LIST_HEAD(&net->rules_ops);
651 spin_lock_init(&net->rules_mod_lock);
652 return 0;
653}
654
655static struct pernet_operations fib_rules_net_ops = {
656 .init = fib_rules_net_init,
657};
658
629static int __init fib_rules_init(void) 659static int __init fib_rules_init(void)
630{ 660{
661 int err;
631 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); 662 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
632 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); 663 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
633 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); 664 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
634 665
635 return register_netdevice_notifier(&fib_rules_notifier); 666 err = register_netdevice_notifier(&fib_rules_notifier);
667 if (err < 0)
668 goto fail;
669
670 err = register_pernet_subsys(&fib_rules_net_ops);
671 if (err < 0)
672 goto fail_unregister;
673 return 0;
674
675fail_unregister:
676 unregister_netdevice_notifier(&fib_rules_notifier);
677fail:
678 rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
679 rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
680 rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
681 return err;
636} 682}
637 683
638subsys_initcall(fib_rules_init); 684subsys_initcall(fib_rules_init);
diff --git a/net/core/flow.c b/net/core/flow.c
index 3ed2b4b1d6d4..46b38e06e0d7 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -293,7 +293,7 @@ void flow_cache_flush(void)
293 static DEFINE_MUTEX(flow_flush_sem); 293 static DEFINE_MUTEX(flow_flush_sem);
294 294
295 /* Don't want cpus going down or up during this. */ 295 /* Don't want cpus going down or up during this. */
296 lock_cpu_hotplug(); 296 get_online_cpus();
297 mutex_lock(&flow_flush_sem); 297 mutex_lock(&flow_flush_sem);
298 atomic_set(&info.cpuleft, num_online_cpus()); 298 atomic_set(&info.cpuleft, num_online_cpus());
299 init_completion(&info.completion); 299 init_completion(&info.completion);
@@ -305,7 +305,7 @@ void flow_cache_flush(void)
305 305
306 wait_for_completion(&info.completion); 306 wait_for_completion(&info.completion);
307 mutex_unlock(&flow_flush_sem); 307 mutex_unlock(&flow_flush_sem);
308 unlock_cpu_hotplug(); 308 put_online_cpus();
309} 309}
310 310
311static void __devinit flow_cache_cpu_prepare(int cpu) 311static void __devinit flow_cache_cpu_prepare(int cpu)
@@ -352,8 +352,7 @@ static int __init flow_cache_init(void)
352 flow_lwm = 2 * flow_hash_size; 352 flow_lwm = 2 * flow_hash_size;
353 flow_hwm = 4 * flow_hash_size; 353 flow_hwm = 4 * flow_hash_size;
354 354
355 init_timer(&flow_hash_rnd_timer); 355 setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
356 flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
357 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 356 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
358 add_timer(&flow_hash_rnd_timer); 357 add_timer(&flow_hash_rnd_timer);
359 358
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index daadbcc4e8dd..57abe8266be1 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -135,7 +135,7 @@ skip:
135 } 135 }
136 136
137 if (!list_empty(&elist[idx].list)) 137 if (!list_empty(&elist[idx].list))
138 mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); 138 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
139 rcu_read_unlock(); 139 rcu_read_unlock();
140} 140}
141 141
@@ -159,13 +159,13 @@ skip:
159int gen_new_estimator(struct gnet_stats_basic *bstats, 159int gen_new_estimator(struct gnet_stats_basic *bstats,
160 struct gnet_stats_rate_est *rate_est, 160 struct gnet_stats_rate_est *rate_est,
161 spinlock_t *stats_lock, 161 spinlock_t *stats_lock,
162 struct rtattr *opt) 162 struct nlattr *opt)
163{ 163{
164 struct gen_estimator *est; 164 struct gen_estimator *est;
165 struct gnet_estimator *parm = RTA_DATA(opt); 165 struct gnet_estimator *parm = nla_data(opt);
166 int idx; 166 int idx;
167 167
168 if (RTA_PAYLOAD(opt) < sizeof(*parm)) 168 if (nla_len(opt) < sizeof(*parm))
169 return -EINVAL; 169 return -EINVAL;
170 170
171 if (parm->interval < -2 || parm->interval > 3) 171 if (parm->interval < -2 || parm->interval > 3)
@@ -191,7 +191,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
191 } 191 }
192 192
193 if (list_empty(&elist[idx].list)) 193 if (list_empty(&elist[idx].list))
194 mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); 194 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
195 195
196 list_add_rcu(&est->list, &elist[idx].list); 196 list_add_rcu(&est->list, &elist[idx].list);
197 return 0; 197 return 0;
@@ -241,7 +241,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
241} 241}
242 242
243/** 243/**
244 * gen_replace_estimator - replace rate estimator configruation 244 * gen_replace_estimator - replace rate estimator configuration
245 * @bstats: basic statistics 245 * @bstats: basic statistics
246 * @rate_est: rate estimator statistics 246 * @rate_est: rate estimator statistics
247 * @stats_lock: statistics lock 247 * @stats_lock: statistics lock
@@ -252,13 +252,12 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
252 * 252 *
253 * Returns 0 on success or a negative error code. 253 * Returns 0 on success or a negative error code.
254 */ 254 */
255int 255int gen_replace_estimator(struct gnet_stats_basic *bstats,
256gen_replace_estimator(struct gnet_stats_basic *bstats, 256 struct gnet_stats_rate_est *rate_est,
257 struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, 257 spinlock_t *stats_lock, struct nlattr *opt)
258 struct rtattr *opt)
259{ 258{
260 gen_kill_estimator(bstats, rate_est); 259 gen_kill_estimator(bstats, rate_est);
261 return gen_new_estimator(bstats, rate_est, stats_lock, opt); 260 return gen_new_estimator(bstats, rate_est, stats_lock, opt);
262} 261}
263 262
264 263
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index bcc25591d8ac..c3d0ffeac243 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -20,16 +20,17 @@
20#include <linux/socket.h> 20#include <linux/socket.h>
21#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
22#include <linux/gen_stats.h> 22#include <linux/gen_stats.h>
23#include <net/netlink.h>
23#include <net/gen_stats.h> 24#include <net/gen_stats.h>
24 25
25 26
26static inline int 27static inline int
27gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) 28gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
28{ 29{
29 RTA_PUT(d->skb, type, size, buf); 30 NLA_PUT(d->skb, type, size, buf);
30 return 0; 31 return 0;
31 32
32rtattr_failure: 33nla_put_failure:
33 spin_unlock_bh(d->lock); 34 spin_unlock_bh(d->lock);
34 return -1; 35 return -1;
35} 36}
@@ -55,13 +56,14 @@ rtattr_failure:
55int 56int
56gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, 57gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
57 int xstats_type, spinlock_t *lock, struct gnet_dump *d) 58 int xstats_type, spinlock_t *lock, struct gnet_dump *d)
59 __acquires(lock)
58{ 60{
59 memset(d, 0, sizeof(*d)); 61 memset(d, 0, sizeof(*d));
60 62
61 spin_lock_bh(lock); 63 spin_lock_bh(lock);
62 d->lock = lock; 64 d->lock = lock;
63 if (type) 65 if (type)
64 d->tail = (struct rtattr *)skb_tail_pointer(skb); 66 d->tail = (struct nlattr *)skb_tail_pointer(skb);
65 d->skb = skb; 67 d->skb = skb;
66 d->compat_tc_stats = tc_stats_type; 68 d->compat_tc_stats = tc_stats_type;
67 d->compat_xstats = xstats_type; 69 d->compat_xstats = xstats_type;
@@ -212,7 +214,7 @@ int
212gnet_stats_finish_copy(struct gnet_dump *d) 214gnet_stats_finish_copy(struct gnet_dump *d)
213{ 215{
214 if (d->tail) 216 if (d->tail)
215 d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail; 217 d->tail->nla_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
216 218
217 if (d->compat_tc_stats) 219 if (d->compat_tc_stats)
218 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, 220 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 05979e356963..a16cf1ec5e5e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -59,7 +59,6 @@ static void neigh_timer_handler(unsigned long arg);
59static void __neigh_notify(struct neighbour *n, int type, int flags); 59static void __neigh_notify(struct neighbour *n, int type, int flags);
60static void neigh_update_notify(struct neighbour *neigh); 60static void neigh_update_notify(struct neighbour *neigh);
61static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); 61static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
63 62
64static struct neigh_table *neigh_tables; 63static struct neigh_table *neigh_tables;
65#ifdef CONFIG_PROC_FS 64#ifdef CONFIG_PROC_FS
@@ -165,6 +164,16 @@ static int neigh_forced_gc(struct neigh_table *tbl)
165 return shrunk; 164 return shrunk;
166} 165}
167 166
167static void neigh_add_timer(struct neighbour *n, unsigned long when)
168{
169 neigh_hold(n);
170 if (unlikely(mod_timer(&n->timer, when))) {
171 printk("NEIGH: BUG, double timer add, state is %x\n",
172 n->nud_state);
173 dump_stack();
174 }
175}
176
168static int neigh_del_timer(struct neighbour *n) 177static int neigh_del_timer(struct neighbour *n)
169{ 178{
170 if ((n->nud_state & NUD_IN_TIMER) && 179 if ((n->nud_state & NUD_IN_TIMER) &&
@@ -270,9 +279,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
270 n->nud_state = NUD_NONE; 279 n->nud_state = NUD_NONE;
271 n->output = neigh_blackhole; 280 n->output = neigh_blackhole;
272 n->parms = neigh_parms_clone(&tbl->parms); 281 n->parms = neigh_parms_clone(&tbl->parms);
273 init_timer(&n->timer); 282 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
274 n->timer.function = neigh_timer_handler;
275 n->timer.data = (unsigned long)n;
276 283
277 NEIGH_CACHE_STAT_INC(tbl, allocs); 284 NEIGH_CACHE_STAT_INC(tbl, allocs);
278 n->tbl = tbl; 285 n->tbl = tbl;
@@ -367,7 +374,8 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
367 return n; 374 return n;
368} 375}
369 376
370struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) 377struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
378 const void *pkey)
371{ 379{
372 struct neighbour *n; 380 struct neighbour *n;
373 int key_len = tbl->key_len; 381 int key_len = tbl->key_len;
@@ -377,7 +385,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
377 385
378 read_lock_bh(&tbl->lock); 386 read_lock_bh(&tbl->lock);
379 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { 387 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
380 if (!memcmp(n->primary_key, pkey, key_len)) { 388 if (!memcmp(n->primary_key, pkey, key_len) &&
389 (net == n->dev->nd_net)) {
381 neigh_hold(n); 390 neigh_hold(n);
382 NEIGH_CACHE_STAT_INC(tbl, hits); 391 NEIGH_CACHE_STAT_INC(tbl, hits);
383 break; 392 break;
@@ -455,7 +464,8 @@ out_neigh_release:
455 goto out; 464 goto out;
456} 465}
457 466
458struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, 467struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
468 struct net *net, const void *pkey,
459 struct net_device *dev, int creat) 469 struct net_device *dev, int creat)
460{ 470{
461 struct pneigh_entry *n; 471 struct pneigh_entry *n;
@@ -471,6 +481,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
471 481
472 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { 482 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
473 if (!memcmp(n->key, pkey, key_len) && 483 if (!memcmp(n->key, pkey, key_len) &&
484 (n->net == net) &&
474 (n->dev == dev || !n->dev)) { 485 (n->dev == dev || !n->dev)) {
475 read_unlock_bh(&tbl->lock); 486 read_unlock_bh(&tbl->lock);
476 goto out; 487 goto out;
@@ -487,6 +498,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
487 if (!n) 498 if (!n)
488 goto out; 499 goto out;
489 500
501 n->net = hold_net(net);
490 memcpy(n->key, pkey, key_len); 502 memcpy(n->key, pkey, key_len);
491 n->dev = dev; 503 n->dev = dev;
492 if (dev) 504 if (dev)
@@ -509,7 +521,7 @@ out:
509} 521}
510 522
511 523
512int pneigh_delete(struct neigh_table *tbl, const void *pkey, 524int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
513 struct net_device *dev) 525 struct net_device *dev)
514{ 526{
515 struct pneigh_entry *n, **np; 527 struct pneigh_entry *n, **np;
@@ -524,13 +536,15 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey,
524 write_lock_bh(&tbl->lock); 536 write_lock_bh(&tbl->lock);
525 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 537 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
526 np = &n->next) { 538 np = &n->next) {
527 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { 539 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
540 (n->net == net)) {
528 *np = n->next; 541 *np = n->next;
529 write_unlock_bh(&tbl->lock); 542 write_unlock_bh(&tbl->lock);
530 if (tbl->pdestructor) 543 if (tbl->pdestructor)
531 tbl->pdestructor(n); 544 tbl->pdestructor(n);
532 if (n->dev) 545 if (n->dev)
533 dev_put(n->dev); 546 dev_put(n->dev);
547 release_net(n->net);
534 kfree(n); 548 kfree(n);
535 return 0; 549 return 0;
536 } 550 }
@@ -553,6 +567,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
553 tbl->pdestructor(n); 567 tbl->pdestructor(n);
554 if (n->dev) 568 if (n->dev)
555 dev_put(n->dev); 569 dev_put(n->dev);
570 release_net(n->net);
556 kfree(n); 571 kfree(n);
557 continue; 572 continue;
558 } 573 }
@@ -562,6 +577,13 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
562 return -ENOENT; 577 return -ENOENT;
563} 578}
564 579
580static void neigh_parms_destroy(struct neigh_parms *parms);
581
582static inline void neigh_parms_put(struct neigh_parms *parms)
583{
584 if (atomic_dec_and_test(&parms->refcnt))
585 neigh_parms_destroy(parms);
586}
565 587
566/* 588/*
567 * neighbour must already be out of the table; 589 * neighbour must already be out of the table;
@@ -718,15 +740,6 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
718 p->ucast_probes + p->app_probes + p->mcast_probes); 740 p->ucast_probes + p->app_probes + p->mcast_probes);
719} 741}
720 742
721static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
722{
723 if (unlikely(mod_timer(&n->timer, when))) {
724 printk("NEIGH: BUG, double timer add, state is %x\n",
725 n->nud_state);
726 dump_stack();
727 }
728}
729
730/* Called when a timer expires for a neighbour entry. */ 743/* Called when a timer expires for a neighbour entry. */
731 744
732static void neigh_timer_handler(unsigned long arg) 745static void neigh_timer_handler(unsigned long arg)
@@ -858,7 +871,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
858 atomic_set(&neigh->probes, neigh->parms->ucast_probes); 871 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
859 neigh->nud_state = NUD_INCOMPLETE; 872 neigh->nud_state = NUD_INCOMPLETE;
860 neigh->updated = jiffies; 873 neigh->updated = jiffies;
861 neigh_hold(neigh);
862 neigh_add_timer(neigh, now + 1); 874 neigh_add_timer(neigh, now + 1);
863 } else { 875 } else {
864 neigh->nud_state = NUD_FAILED; 876 neigh->nud_state = NUD_FAILED;
@@ -871,7 +883,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
871 } 883 }
872 } else if (neigh->nud_state & NUD_STALE) { 884 } else if (neigh->nud_state & NUD_STALE) {
873 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); 885 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
874 neigh_hold(neigh);
875 neigh->nud_state = NUD_DELAY; 886 neigh->nud_state = NUD_DELAY;
876 neigh->updated = jiffies; 887 neigh->updated = jiffies;
877 neigh_add_timer(neigh, 888 neigh_add_timer(neigh,
@@ -1015,13 +1026,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1015 1026
1016 if (new != old) { 1027 if (new != old) {
1017 neigh_del_timer(neigh); 1028 neigh_del_timer(neigh);
1018 if (new & NUD_IN_TIMER) { 1029 if (new & NUD_IN_TIMER)
1019 neigh_hold(neigh);
1020 neigh_add_timer(neigh, (jiffies + 1030 neigh_add_timer(neigh, (jiffies +
1021 ((new & NUD_REACHABLE) ? 1031 ((new & NUD_REACHABLE) ?
1022 neigh->parms->reachable_time : 1032 neigh->parms->reachable_time :
1023 0))); 1033 0)));
1024 }
1025 neigh->nud_state = new; 1034 neigh->nud_state = new;
1026 } 1035 }
1027 1036
@@ -1266,27 +1275,49 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1266 spin_unlock(&tbl->proxy_queue.lock); 1275 spin_unlock(&tbl->proxy_queue.lock);
1267} 1276}
1268 1277
1278static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1279 struct net *net, int ifindex)
1280{
1281 struct neigh_parms *p;
1282
1283 for (p = &tbl->parms; p; p = p->next) {
1284 if (p->net != net)
1285 continue;
1286 if ((p->dev && p->dev->ifindex == ifindex) ||
1287 (!p->dev && !ifindex))
1288 return p;
1289 }
1290
1291 return NULL;
1292}
1269 1293
1270struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1294struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1271 struct neigh_table *tbl) 1295 struct neigh_table *tbl)
1272{ 1296{
1273 struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1297 struct neigh_parms *p, *ref;
1298 struct net *net;
1299
1300 net = dev->nd_net;
1301 ref = lookup_neigh_params(tbl, net, 0);
1302 if (!ref)
1303 return NULL;
1274 1304
1305 p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1275 if (p) { 1306 if (p) {
1276 p->tbl = tbl; 1307 p->tbl = tbl;
1277 atomic_set(&p->refcnt, 1); 1308 atomic_set(&p->refcnt, 1);
1278 INIT_RCU_HEAD(&p->rcu_head); 1309 INIT_RCU_HEAD(&p->rcu_head);
1279 p->reachable_time = 1310 p->reachable_time =
1280 neigh_rand_reach_time(p->base_reachable_time); 1311 neigh_rand_reach_time(p->base_reachable_time);
1281 if (dev) {
1282 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1283 kfree(p);
1284 return NULL;
1285 }
1286 1312
1287 dev_hold(dev); 1313 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1288 p->dev = dev; 1314 kfree(p);
1315 return NULL;
1289 } 1316 }
1317
1318 dev_hold(dev);
1319 p->dev = dev;
1320 p->net = hold_net(net);
1290 p->sysctl_table = NULL; 1321 p->sysctl_table = NULL;
1291 write_lock_bh(&tbl->lock); 1322 write_lock_bh(&tbl->lock);
1292 p->next = tbl->parms.next; 1323 p->next = tbl->parms.next;
@@ -1326,8 +1357,9 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1326 NEIGH_PRINTK1("neigh_parms_release: not found\n"); 1357 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1327} 1358}
1328 1359
1329void neigh_parms_destroy(struct neigh_parms *parms) 1360static void neigh_parms_destroy(struct neigh_parms *parms)
1330{ 1361{
1362 release_net(parms->net);
1331 kfree(parms); 1363 kfree(parms);
1332} 1364}
1333 1365
@@ -1338,6 +1370,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1338 unsigned long now = jiffies; 1370 unsigned long now = jiffies;
1339 unsigned long phsize; 1371 unsigned long phsize;
1340 1372
1373 tbl->parms.net = &init_net;
1341 atomic_set(&tbl->parms.refcnt, 1); 1374 atomic_set(&tbl->parms.refcnt, 1);
1342 INIT_RCU_HEAD(&tbl->parms.rcu_head); 1375 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1343 tbl->parms.reachable_time = 1376 tbl->parms.reachable_time =
@@ -1372,15 +1405,11 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1372 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); 1405 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1373 1406
1374 rwlock_init(&tbl->lock); 1407 rwlock_init(&tbl->lock);
1375 init_timer(&tbl->gc_timer); 1408 setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
1376 tbl->gc_timer.data = (unsigned long)tbl;
1377 tbl->gc_timer.function = neigh_periodic_timer;
1378 tbl->gc_timer.expires = now + 1; 1409 tbl->gc_timer.expires = now + 1;
1379 add_timer(&tbl->gc_timer); 1410 add_timer(&tbl->gc_timer);
1380 1411
1381 init_timer(&tbl->proxy_timer); 1412 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1382 tbl->proxy_timer.data = (unsigned long)tbl;
1383 tbl->proxy_timer.function = neigh_proxy_process;
1384 skb_queue_head_init_class(&tbl->proxy_queue, 1413 skb_queue_head_init_class(&tbl->proxy_queue,
1385 &neigh_table_proxy_queue_class); 1414 &neigh_table_proxy_queue_class);
1386 1415
@@ -1435,6 +1464,8 @@ int neigh_table_clear(struct neigh_table *tbl)
1435 kfree(tbl->phash_buckets); 1464 kfree(tbl->phash_buckets);
1436 tbl->phash_buckets = NULL; 1465 tbl->phash_buckets = NULL;
1437 1466
1467 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1468
1438 free_percpu(tbl->stats); 1469 free_percpu(tbl->stats);
1439 tbl->stats = NULL; 1470 tbl->stats = NULL;
1440 1471
@@ -1481,7 +1512,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1481 goto out_dev_put; 1512 goto out_dev_put;
1482 1513
1483 if (ndm->ndm_flags & NTF_PROXY) { 1514 if (ndm->ndm_flags & NTF_PROXY) {
1484 err = pneigh_delete(tbl, nla_data(dst_attr), dev); 1515 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1485 goto out_dev_put; 1516 goto out_dev_put;
1486 } 1517 }
1487 1518
@@ -1558,7 +1589,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1558 struct pneigh_entry *pn; 1589 struct pneigh_entry *pn;
1559 1590
1560 err = -ENOBUFS; 1591 err = -ENOBUFS;
1561 pn = pneigh_lookup(tbl, dst, dev, 1); 1592 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1562 if (pn) { 1593 if (pn) {
1563 pn->flags = ndm->ndm_flags; 1594 pn->flags = ndm->ndm_flags;
1564 err = 0; 1595 err = 0;
@@ -1753,19 +1784,6 @@ errout:
1753 return -EMSGSIZE; 1784 return -EMSGSIZE;
1754} 1785}
1755 1786
1756static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1757 int ifindex)
1758{
1759 struct neigh_parms *p;
1760
1761 for (p = &tbl->parms; p; p = p->next)
1762 if ((p->dev && p->dev->ifindex == ifindex) ||
1763 (!p->dev && !ifindex))
1764 return p;
1765
1766 return NULL;
1767}
1768
1769static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 1787static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1770 [NDTA_NAME] = { .type = NLA_STRING }, 1788 [NDTA_NAME] = { .type = NLA_STRING },
1771 [NDTA_THRESH1] = { .type = NLA_U32 }, 1789 [NDTA_THRESH1] = { .type = NLA_U32 },
@@ -1793,6 +1811,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1793 1811
1794static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1812static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1795{ 1813{
1814 struct net *net = skb->sk->sk_net;
1796 struct neigh_table *tbl; 1815 struct neigh_table *tbl;
1797 struct ndtmsg *ndtmsg; 1816 struct ndtmsg *ndtmsg;
1798 struct nlattr *tb[NDTA_MAX+1]; 1817 struct nlattr *tb[NDTA_MAX+1];
@@ -1842,7 +1861,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1842 if (tbp[NDTPA_IFINDEX]) 1861 if (tbp[NDTPA_IFINDEX])
1843 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 1862 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1844 1863
1845 p = lookup_neigh_params(tbl, ifindex); 1864 p = lookup_neigh_params(tbl, net, ifindex);
1846 if (p == NULL) { 1865 if (p == NULL) {
1847 err = -ENOENT; 1866 err = -ENOENT;
1848 goto errout_tbl_lock; 1867 goto errout_tbl_lock;
@@ -1917,6 +1936,7 @@ errout:
1917 1936
1918static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 1937static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1919{ 1938{
1939 struct net *net = skb->sk->sk_net;
1920 int family, tidx, nidx = 0; 1940 int family, tidx, nidx = 0;
1921 int tbl_skip = cb->args[0]; 1941 int tbl_skip = cb->args[0];
1922 int neigh_skip = cb->args[1]; 1942 int neigh_skip = cb->args[1];
@@ -1936,8 +1956,11 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1936 NLM_F_MULTI) <= 0) 1956 NLM_F_MULTI) <= 0)
1937 break; 1957 break;
1938 1958
1939 for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { 1959 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
1940 if (nidx < neigh_skip) 1960 if (net != p->net)
1961 continue;
1962
1963 if (nidx++ < neigh_skip)
1941 continue; 1964 continue;
1942 1965
1943 if (neightbl_fill_param_info(skb, tbl, p, 1966 if (neightbl_fill_param_info(skb, tbl, p,
@@ -2013,6 +2036,7 @@ static void neigh_update_notify(struct neighbour *neigh)
2013static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2036static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2014 struct netlink_callback *cb) 2037 struct netlink_callback *cb)
2015{ 2038{
2039 struct net * net = skb->sk->sk_net;
2016 struct neighbour *n; 2040 struct neighbour *n;
2017 int rc, h, s_h = cb->args[1]; 2041 int rc, h, s_h = cb->args[1];
2018 int idx, s_idx = idx = cb->args[2]; 2042 int idx, s_idx = idx = cb->args[2];
@@ -2023,8 +2047,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2023 continue; 2047 continue;
2024 if (h > s_h) 2048 if (h > s_h)
2025 s_idx = 0; 2049 s_idx = 0;
2026 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { 2050 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
2027 if (idx < s_idx) 2051 int lidx;
2052 if (n->dev->nd_net != net)
2053 continue;
2054 lidx = idx++;
2055 if (lidx < s_idx)
2028 continue; 2056 continue;
2029 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 2057 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2030 cb->nlh->nlmsg_seq, 2058 cb->nlh->nlmsg_seq,
@@ -2116,6 +2144,7 @@ EXPORT_SYMBOL(__neigh_for_each_release);
2116static struct neighbour *neigh_get_first(struct seq_file *seq) 2144static struct neighbour *neigh_get_first(struct seq_file *seq)
2117{ 2145{
2118 struct neigh_seq_state *state = seq->private; 2146 struct neigh_seq_state *state = seq->private;
2147 struct net *net = state->p.net;
2119 struct neigh_table *tbl = state->tbl; 2148 struct neigh_table *tbl = state->tbl;
2120 struct neighbour *n = NULL; 2149 struct neighbour *n = NULL;
2121 int bucket = state->bucket; 2150 int bucket = state->bucket;
@@ -2125,6 +2154,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
2125 n = tbl->hash_buckets[bucket]; 2154 n = tbl->hash_buckets[bucket];
2126 2155
2127 while (n) { 2156 while (n) {
2157 if (n->dev->nd_net != net)
2158 goto next;
2128 if (state->neigh_sub_iter) { 2159 if (state->neigh_sub_iter) {
2129 loff_t fakep = 0; 2160 loff_t fakep = 0;
2130 void *v; 2161 void *v;
@@ -2154,6 +2185,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2154 loff_t *pos) 2185 loff_t *pos)
2155{ 2186{
2156 struct neigh_seq_state *state = seq->private; 2187 struct neigh_seq_state *state = seq->private;
2188 struct net *net = state->p.net;
2157 struct neigh_table *tbl = state->tbl; 2189 struct neigh_table *tbl = state->tbl;
2158 2190
2159 if (state->neigh_sub_iter) { 2191 if (state->neigh_sub_iter) {
@@ -2165,6 +2197,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2165 2197
2166 while (1) { 2198 while (1) {
2167 while (n) { 2199 while (n) {
2200 if (n->dev->nd_net != net)
2201 goto next;
2168 if (state->neigh_sub_iter) { 2202 if (state->neigh_sub_iter) {
2169 void *v = state->neigh_sub_iter(state, n, pos); 2203 void *v = state->neigh_sub_iter(state, n, pos);
2170 if (v) 2204 if (v)
@@ -2211,6 +2245,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2211static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 2245static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2212{ 2246{
2213 struct neigh_seq_state *state = seq->private; 2247 struct neigh_seq_state *state = seq->private;
2248 struct net * net = state->p.net;
2214 struct neigh_table *tbl = state->tbl; 2249 struct neigh_table *tbl = state->tbl;
2215 struct pneigh_entry *pn = NULL; 2250 struct pneigh_entry *pn = NULL;
2216 int bucket = state->bucket; 2251 int bucket = state->bucket;
@@ -2218,6 +2253,8 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2218 state->flags |= NEIGH_SEQ_IS_PNEIGH; 2253 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2219 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 2254 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2220 pn = tbl->phash_buckets[bucket]; 2255 pn = tbl->phash_buckets[bucket];
2256 while (pn && (pn->net != net))
2257 pn = pn->next;
2221 if (pn) 2258 if (pn)
2222 break; 2259 break;
2223 } 2260 }
@@ -2231,6 +2268,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2231 loff_t *pos) 2268 loff_t *pos)
2232{ 2269{
2233 struct neigh_seq_state *state = seq->private; 2270 struct neigh_seq_state *state = seq->private;
2271 struct net * net = state->p.net;
2234 struct neigh_table *tbl = state->tbl; 2272 struct neigh_table *tbl = state->tbl;
2235 2273
2236 pn = pn->next; 2274 pn = pn->next;
@@ -2238,6 +2276,8 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2238 if (++state->bucket > PNEIGH_HASHMASK) 2276 if (++state->bucket > PNEIGH_HASHMASK)
2239 break; 2277 break;
2240 pn = tbl->phash_buckets[state->bucket]; 2278 pn = tbl->phash_buckets[state->bucket];
2279 while (pn && (pn->net != net))
2280 pn = pn->next;
2241 if (pn) 2281 if (pn)
2242 break; 2282 break;
2243 } 2283 }
@@ -2275,6 +2315,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2275} 2315}
2276 2316
2277void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 2317void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2318 __acquires(tbl->lock)
2278{ 2319{
2279 struct neigh_seq_state *state = seq->private; 2320 struct neigh_seq_state *state = seq->private;
2280 loff_t pos_minus_one; 2321 loff_t pos_minus_one;
@@ -2318,6 +2359,7 @@ out:
2318EXPORT_SYMBOL(neigh_seq_next); 2359EXPORT_SYMBOL(neigh_seq_next);
2319 2360
2320void neigh_seq_stop(struct seq_file *seq, void *v) 2361void neigh_seq_stop(struct seq_file *seq, void *v)
2362 __releases(tbl->lock)
2321{ 2363{
2322 struct neigh_seq_state *state = seq->private; 2364 struct neigh_seq_state *state = seq->private;
2323 struct neigh_table *tbl = state->tbl; 2365 struct neigh_table *tbl = state->tbl;
@@ -2439,6 +2481,7 @@ static inline size_t neigh_nlmsg_size(void)
2439 2481
2440static void __neigh_notify(struct neighbour *n, int type, int flags) 2482static void __neigh_notify(struct neighbour *n, int type, int flags)
2441{ 2483{
2484 struct net *net = n->dev->nd_net;
2442 struct sk_buff *skb; 2485 struct sk_buff *skb;
2443 int err = -ENOBUFS; 2486 int err = -ENOBUFS;
2444 2487
@@ -2453,10 +2496,10 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
2453 kfree_skb(skb); 2496 kfree_skb(skb);
2454 goto errout; 2497 goto errout;
2455 } 2498 }
2456 err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 2499 err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2457errout: 2500errout:
2458 if (err < 0) 2501 if (err < 0)
2459 rtnl_set_sk_err(RTNLGRP_NEIGH, err); 2502 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2460} 2503}
2461 2504
2462#ifdef CONFIG_ARPD 2505#ifdef CONFIG_ARPD
@@ -2470,11 +2513,8 @@ void neigh_app_ns(struct neighbour *n)
2470 2513
2471static struct neigh_sysctl_table { 2514static struct neigh_sysctl_table {
2472 struct ctl_table_header *sysctl_header; 2515 struct ctl_table_header *sysctl_header;
2473 ctl_table neigh_vars[__NET_NEIGH_MAX]; 2516 struct ctl_table neigh_vars[__NET_NEIGH_MAX];
2474 ctl_table neigh_dev[2]; 2517 char *dev_name;
2475 ctl_table neigh_neigh_dir[2];
2476 ctl_table neigh_proto_dir[2];
2477 ctl_table neigh_root_dir[2];
2478} neigh_sysctl_template __read_mostly = { 2518} neigh_sysctl_template __read_mostly = {
2479 .neigh_vars = { 2519 .neigh_vars = {
2480 { 2520 {
@@ -2605,32 +2645,7 @@ static struct neigh_sysctl_table {
2605 .mode = 0644, 2645 .mode = 0644,
2606 .proc_handler = &proc_dointvec, 2646 .proc_handler = &proc_dointvec,
2607 }, 2647 },
2608 {} 2648 {},
2609 },
2610 .neigh_dev = {
2611 {
2612 .ctl_name = NET_PROTO_CONF_DEFAULT,
2613 .procname = "default",
2614 .mode = 0555,
2615 },
2616 },
2617 .neigh_neigh_dir = {
2618 {
2619 .procname = "neigh",
2620 .mode = 0555,
2621 },
2622 },
2623 .neigh_proto_dir = {
2624 {
2625 .mode = 0555,
2626 },
2627 },
2628 .neigh_root_dir = {
2629 {
2630 .ctl_name = CTL_NET,
2631 .procname = "net",
2632 .mode = 0555,
2633 },
2634 }, 2649 },
2635}; 2650};
2636 2651
@@ -2638,14 +2653,26 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2638 int p_id, int pdev_id, char *p_name, 2653 int p_id, int pdev_id, char *p_name,
2639 proc_handler *handler, ctl_handler *strategy) 2654 proc_handler *handler, ctl_handler *strategy)
2640{ 2655{
2641 struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template, 2656 struct neigh_sysctl_table *t;
2642 sizeof(*t), GFP_KERNEL);
2643 const char *dev_name_source = NULL; 2657 const char *dev_name_source = NULL;
2644 char *dev_name = NULL;
2645 int err = 0;
2646 2658
2659#define NEIGH_CTL_PATH_ROOT 0
2660#define NEIGH_CTL_PATH_PROTO 1
2661#define NEIGH_CTL_PATH_NEIGH 2
2662#define NEIGH_CTL_PATH_DEV 3
2663
2664 struct ctl_path neigh_path[] = {
2665 { .procname = "net", .ctl_name = CTL_NET, },
2666 { .procname = "proto", .ctl_name = 0, },
2667 { .procname = "neigh", .ctl_name = 0, },
2668 { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, },
2669 { },
2670 };
2671
2672 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2647 if (!t) 2673 if (!t)
2648 return -ENOBUFS; 2674 goto err;
2675
2649 t->neigh_vars[0].data = &p->mcast_probes; 2676 t->neigh_vars[0].data = &p->mcast_probes;
2650 t->neigh_vars[1].data = &p->ucast_probes; 2677 t->neigh_vars[1].data = &p->ucast_probes;
2651 t->neigh_vars[2].data = &p->app_probes; 2678 t->neigh_vars[2].data = &p->app_probes;
@@ -2663,11 +2690,11 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2663 2690
2664 if (dev) { 2691 if (dev) {
2665 dev_name_source = dev->name; 2692 dev_name_source = dev->name;
2666 t->neigh_dev[0].ctl_name = dev->ifindex; 2693 neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
2667 /* Terminate the table early */ 2694 /* Terminate the table early */
2668 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); 2695 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2669 } else { 2696 } else {
2670 dev_name_source = t->neigh_dev[0].procname; 2697 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2671 t->neigh_vars[14].data = (int *)(p + 1); 2698 t->neigh_vars[14].data = (int *)(p + 1);
2672 t->neigh_vars[15].data = (int *)(p + 1) + 1; 2699 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2673 t->neigh_vars[16].data = (int *)(p + 1) + 2; 2700 t->neigh_vars[16].data = (int *)(p + 1) + 2;
@@ -2702,39 +2729,28 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2702 t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; 2729 t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
2703 } 2730 }
2704 2731
2705 dev_name = kstrdup(dev_name_source, GFP_KERNEL); 2732 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2706 if (!dev_name) { 2733 if (!t->dev_name)
2707 err = -ENOBUFS;
2708 goto free; 2734 goto free;
2709 }
2710
2711 t->neigh_dev[0].procname = dev_name;
2712
2713 t->neigh_neigh_dir[0].ctl_name = pdev_id;
2714 2735
2715 t->neigh_proto_dir[0].procname = p_name; 2736 neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2716 t->neigh_proto_dir[0].ctl_name = p_id; 2737 neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
2738 neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2739 neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
2717 2740
2718 t->neigh_dev[0].child = t->neigh_vars; 2741 t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars);
2719 t->neigh_neigh_dir[0].child = t->neigh_dev; 2742 if (!t->sysctl_header)
2720 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
2721 t->neigh_root_dir[0].child = t->neigh_proto_dir;
2722
2723 t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
2724 if (!t->sysctl_header) {
2725 err = -ENOBUFS;
2726 goto free_procname; 2743 goto free_procname;
2727 } 2744
2728 p->sysctl_table = t; 2745 p->sysctl_table = t;
2729 return 0; 2746 return 0;
2730 2747
2731 /* error path */ 2748free_procname:
2732 free_procname: 2749 kfree(t->dev_name);
2733 kfree(dev_name); 2750free:
2734 free:
2735 kfree(t); 2751 kfree(t);
2736 2752err:
2737 return err; 2753 return -ENOBUFS;
2738} 2754}
2739 2755
2740void neigh_sysctl_unregister(struct neigh_parms *p) 2756void neigh_sysctl_unregister(struct neigh_parms *p)
@@ -2743,7 +2759,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
2743 struct neigh_sysctl_table *t = p->sysctl_table; 2759 struct neigh_sysctl_table *t = p->sysctl_table;
2744 p->sysctl_table = NULL; 2760 p->sysctl_table = NULL;
2745 unregister_sysctl_table(t->sysctl_header); 2761 unregister_sysctl_table(t->sysctl_header);
2746 kfree(t->neigh_dev[0].procname); 2762 kfree(t->dev_name);
2747 kfree(t); 2763 kfree(t);
2748 } 2764 }
2749} 2765}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 61ead1d11132..7635d3f72723 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,17 +95,6 @@ NETDEVICE_SHOW(type, fmt_dec);
95NETDEVICE_SHOW(link_mode, fmt_dec); 95NETDEVICE_SHOW(link_mode, fmt_dec);
96 96
97/* use same locking rules as GIFHWADDR ioctl's */ 97/* use same locking rules as GIFHWADDR ioctl's */
98static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
99{
100 int i;
101 char *cp = buf;
102
103 for (i = 0; i < len; i++)
104 cp += sprintf(cp, "%02x%c", addr[i],
105 i == (len - 1) ? '\n' : ':');
106 return cp - buf;
107}
108
109static ssize_t show_address(struct device *dev, struct device_attribute *attr, 98static ssize_t show_address(struct device *dev, struct device_attribute *attr,
110 char *buf) 99 char *buf)
111{ 100{
@@ -114,7 +103,7 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
114 103
115 read_lock(&dev_base_lock); 104 read_lock(&dev_base_lock);
116 if (dev_isalive(net)) 105 if (dev_isalive(net))
117 ret = format_addr(buf, net->dev_addr, net->addr_len); 106 ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
118 read_unlock(&dev_base_lock); 107 read_unlock(&dev_base_lock);
119 return ret; 108 return ret;
120} 109}
@@ -124,7 +113,7 @@ static ssize_t show_broadcast(struct device *dev,
124{ 113{
125 struct net_device *net = to_net_dev(dev); 114 struct net_device *net = to_net_dev(dev);
126 if (dev_isalive(net)) 115 if (dev_isalive(net))
127 return format_addr(buf, net->broadcast, net->addr_len); 116 return sysfs_format_mac(buf, net->broadcast, net->addr_len);
128 return -EINVAL; 117 return -EINVAL;
129} 118}
130 119
@@ -247,9 +236,8 @@ static ssize_t netstat_show(const struct device *d,
247 struct net_device_stats *stats; 236 struct net_device_stats *stats;
248 ssize_t ret = -EINVAL; 237 ssize_t ret = -EINVAL;
249 238
250 if (offset > sizeof(struct net_device_stats) || 239 WARN_ON(offset > sizeof(struct net_device_stats) ||
251 offset % sizeof(unsigned long) != 0) 240 offset % sizeof(unsigned long) != 0);
252 WARN_ON(1);
253 241
254 read_lock(&dev_base_lock); 242 read_lock(&dev_base_lock);
255 if (dev_isalive(dev) && dev->get_stats && 243 if (dev_isalive(dev) && dev->get_stats &&
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6f71db8c4428..26e941d912e8 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -17,74 +17,13 @@ static DEFINE_MUTEX(net_mutex);
17 17
18LIST_HEAD(net_namespace_list); 18LIST_HEAD(net_namespace_list);
19 19
20static struct kmem_cache *net_cachep;
21
22struct net init_net; 20struct net init_net;
23EXPORT_SYMBOL_GPL(init_net); 21EXPORT_SYMBOL(init_net);
24
25static struct net *net_alloc(void)
26{
27 return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
28}
29
30static void net_free(struct net *net)
31{
32 if (!net)
33 return;
34
35 if (unlikely(atomic_read(&net->use_count) != 0)) {
36 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
37 atomic_read(&net->use_count));
38 return;
39 }
40
41 kmem_cache_free(net_cachep, net);
42}
43
44static void cleanup_net(struct work_struct *work)
45{
46 struct pernet_operations *ops;
47 struct net *net;
48
49 net = container_of(work, struct net, work);
50
51 mutex_lock(&net_mutex);
52
53 /* Don't let anyone else find us. */
54 rtnl_lock();
55 list_del(&net->list);
56 rtnl_unlock();
57
58 /* Run all of the network namespace exit methods */
59 list_for_each_entry_reverse(ops, &pernet_list, list) {
60 if (ops->exit)
61 ops->exit(net);
62 }
63
64 mutex_unlock(&net_mutex);
65
66 /* Ensure there are no outstanding rcu callbacks using this
67 * network namespace.
68 */
69 rcu_barrier();
70
71 /* Finally it is safe to free my network namespace structure */
72 net_free(net);
73}
74
75
76void __put_net(struct net *net)
77{
78 /* Cleanup the network namespace in process context */
79 INIT_WORK(&net->work, cleanup_net);
80 schedule_work(&net->work);
81}
82EXPORT_SYMBOL_GPL(__put_net);
83 22
84/* 23/*
85 * setup_net runs the initializers for the network namespace object. 24 * setup_net runs the initializers for the network namespace object.
86 */ 25 */
87static int setup_net(struct net *net) 26static __net_init int setup_net(struct net *net)
88{ 27{
89 /* Must be called with net_mutex held */ 28 /* Must be called with net_mutex held */
90 struct pernet_operations *ops; 29 struct pernet_operations *ops;
@@ -112,9 +51,34 @@ out_undo:
112 if (ops->exit) 51 if (ops->exit)
113 ops->exit(net); 52 ops->exit(net);
114 } 53 }
54
55 rcu_barrier();
115 goto out; 56 goto out;
116} 57}
117 58
59#ifdef CONFIG_NET_NS
60static struct kmem_cache *net_cachep;
61static struct workqueue_struct *netns_wq;
62
63static struct net *net_alloc(void)
64{
65 return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
66}
67
68static void net_free(struct net *net)
69{
70 if (!net)
71 return;
72
73 if (unlikely(atomic_read(&net->use_count) != 0)) {
74 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
75 atomic_read(&net->use_count));
76 return;
77 }
78
79 kmem_cache_free(net_cachep, net);
80}
81
118struct net *copy_net_ns(unsigned long flags, struct net *old_net) 82struct net *copy_net_ns(unsigned long flags, struct net *old_net)
119{ 83{
120 struct net *new_net = NULL; 84 struct net *new_net = NULL;
@@ -125,10 +89,6 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
125 if (!(flags & CLONE_NEWNET)) 89 if (!(flags & CLONE_NEWNET))
126 return old_net; 90 return old_net;
127 91
128#ifndef CONFIG_NET_NS
129 return ERR_PTR(-EINVAL);
130#endif
131
132 err = -ENOMEM; 92 err = -ENOMEM;
133 new_net = net_alloc(); 93 new_net = net_alloc();
134 if (!new_net) 94 if (!new_net)
@@ -155,14 +115,70 @@ out:
155 return new_net; 115 return new_net;
156} 116}
157 117
118static void cleanup_net(struct work_struct *work)
119{
120 struct pernet_operations *ops;
121 struct net *net;
122
123 net = container_of(work, struct net, work);
124
125 mutex_lock(&net_mutex);
126
127 /* Don't let anyone else find us. */
128 rtnl_lock();
129 list_del(&net->list);
130 rtnl_unlock();
131
132 /* Run all of the network namespace exit methods */
133 list_for_each_entry_reverse(ops, &pernet_list, list) {
134 if (ops->exit)
135 ops->exit(net);
136 }
137
138 mutex_unlock(&net_mutex);
139
140 /* Ensure there are no outstanding rcu callbacks using this
141 * network namespace.
142 */
143 rcu_barrier();
144
145 /* Finally it is safe to free my network namespace structure */
146 net_free(net);
147}
148
149void __put_net(struct net *net)
150{
151 /* Cleanup the network namespace in process context */
152 INIT_WORK(&net->work, cleanup_net);
153 queue_work(netns_wq, &net->work);
154}
155EXPORT_SYMBOL_GPL(__put_net);
156
157#else
158struct net *copy_net_ns(unsigned long flags, struct net *old_net)
159{
160 if (flags & CLONE_NEWNET)
161 return ERR_PTR(-EINVAL);
162 return old_net;
163}
164#endif
165
158static int __init net_ns_init(void) 166static int __init net_ns_init(void)
159{ 167{
160 int err; 168 int err;
161 169
162 printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); 170 printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
171#ifdef CONFIG_NET_NS
163 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 172 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
164 SMP_CACHE_BYTES, 173 SMP_CACHE_BYTES,
165 SLAB_PANIC, NULL); 174 SLAB_PANIC, NULL);
175
176 /* Create workqueue for cleanup */
177 netns_wq = create_singlethread_workqueue("netns");
178 if (!netns_wq)
179 panic("Could not create netns workq");
180#endif
181
166 mutex_lock(&net_mutex); 182 mutex_lock(&net_mutex);
167 err = setup_net(&init_net); 183 err = setup_net(&init_net);
168 184
@@ -179,35 +195,35 @@ static int __init net_ns_init(void)
179 195
180pure_initcall(net_ns_init); 196pure_initcall(net_ns_init);
181 197
198#ifdef CONFIG_NET_NS
182static int register_pernet_operations(struct list_head *list, 199static int register_pernet_operations(struct list_head *list,
183 struct pernet_operations *ops) 200 struct pernet_operations *ops)
184{ 201{
185 struct net *net, *undo_net; 202 struct net *net, *undo_net;
186 int error; 203 int error;
187 204
188 error = 0;
189 list_add_tail(&ops->list, list); 205 list_add_tail(&ops->list, list);
190 for_each_net(net) { 206 if (ops->init) {
191 if (ops->init) { 207 for_each_net(net) {
192 error = ops->init(net); 208 error = ops->init(net);
193 if (error) 209 if (error)
194 goto out_undo; 210 goto out_undo;
195 } 211 }
196 } 212 }
197out: 213 return 0;
198 return error;
199 214
200out_undo: 215out_undo:
201 /* If I have an error cleanup all namespaces I initialized */ 216 /* If I have an error cleanup all namespaces I initialized */
202 list_del(&ops->list); 217 list_del(&ops->list);
203 for_each_net(undo_net) { 218 if (ops->exit) {
204 if (undo_net == net) 219 for_each_net(undo_net) {
205 goto undone; 220 if (undo_net == net)
206 if (ops->exit) 221 goto undone;
207 ops->exit(undo_net); 222 ops->exit(undo_net);
223 }
208 } 224 }
209undone: 225undone:
210 goto out; 226 return error;
211} 227}
212 228
213static void unregister_pernet_operations(struct pernet_operations *ops) 229static void unregister_pernet_operations(struct pernet_operations *ops)
@@ -215,11 +231,28 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
215 struct net *net; 231 struct net *net;
216 232
217 list_del(&ops->list); 233 list_del(&ops->list);
218 for_each_net(net) 234 if (ops->exit)
219 if (ops->exit) 235 for_each_net(net)
220 ops->exit(net); 236 ops->exit(net);
221} 237}
222 238
239#else
240
241static int register_pernet_operations(struct list_head *list,
242 struct pernet_operations *ops)
243{
244 if (ops->init == NULL)
245 return 0;
246 return ops->init(&init_net);
247}
248
249static void unregister_pernet_operations(struct pernet_operations *ops)
250{
251 if (ops->exit)
252 ops->exit(&init_net);
253}
254#endif
255
223/** 256/**
224 * register_pernet_subsys - register a network namespace subsystem 257 * register_pernet_subsys - register a network namespace subsystem
225 * @ops: pernet operations structure for the subsystem 258 * @ops: pernet operations structure for the subsystem
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index bf8d18f1b013..6faa128a4c8e 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -39,8 +39,6 @@ static struct sk_buff_head skb_pool;
39static atomic_t trapped; 39static atomic_t trapped;
40 40
41#define USEC_PER_POLL 50 41#define USEC_PER_POLL 50
42#define NETPOLL_RX_ENABLED 1
43#define NETPOLL_RX_DROP 2
44 42
45#define MAX_SKB_SIZE \ 43#define MAX_SKB_SIZE \
46 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ 44 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
@@ -116,54 +114,67 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
116 * network adapter, forcing superfluous retries and possibly timeouts. 114 * network adapter, forcing superfluous retries and possibly timeouts.
117 * Thus, we set our budget to greater than 1. 115 * Thus, we set our budget to greater than 1.
118 */ 116 */
119static void poll_napi(struct netpoll *np) 117static int poll_one_napi(struct netpoll_info *npinfo,
118 struct napi_struct *napi, int budget)
119{
120 int work;
121
122 /* net_rx_action's ->poll() invocations and our's are
123 * synchronized by this test which is only made while
124 * holding the napi->poll_lock.
125 */
126 if (!test_bit(NAPI_STATE_SCHED, &napi->state))
127 return budget;
128
129 atomic_inc(&trapped);
130
131 work = napi->poll(napi, budget);
132
133 atomic_dec(&trapped);
134
135 return budget - work;
136}
137
138static void poll_napi(struct net_device *dev)
120{ 139{
121 struct netpoll_info *npinfo = np->dev->npinfo;
122 struct napi_struct *napi; 140 struct napi_struct *napi;
123 int budget = 16; 141 int budget = 16;
124 142
125 list_for_each_entry(napi, &np->dev->napi_list, dev_list) { 143 list_for_each_entry(napi, &dev->napi_list, dev_list) {
126 if (test_bit(NAPI_STATE_SCHED, &napi->state) && 144 if (napi->poll_owner != smp_processor_id() &&
127 napi->poll_owner != smp_processor_id() &&
128 spin_trylock(&napi->poll_lock)) { 145 spin_trylock(&napi->poll_lock)) {
129 npinfo->rx_flags |= NETPOLL_RX_DROP; 146 budget = poll_one_napi(dev->npinfo, napi, budget);
130 atomic_inc(&trapped);
131
132 napi->poll(napi, budget);
133
134 atomic_dec(&trapped);
135 npinfo->rx_flags &= ~NETPOLL_RX_DROP;
136 spin_unlock(&napi->poll_lock); 147 spin_unlock(&napi->poll_lock);
148
149 if (!budget)
150 break;
137 } 151 }
138 } 152 }
139} 153}
140 154
141static void service_arp_queue(struct netpoll_info *npi) 155static void service_arp_queue(struct netpoll_info *npi)
142{ 156{
143 struct sk_buff *skb; 157 if (npi) {
144 158 struct sk_buff *skb;
145 if (unlikely(!npi))
146 return;
147 159
148 skb = skb_dequeue(&npi->arp_tx); 160 while ((skb = skb_dequeue(&npi->arp_tx)))
149 161 arp_reply(skb);
150 while (skb != NULL) {
151 arp_reply(skb);
152 skb = skb_dequeue(&npi->arp_tx);
153 } 162 }
154} 163}
155 164
156void netpoll_poll(struct netpoll *np) 165void netpoll_poll(struct netpoll *np)
157{ 166{
158 if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) 167 struct net_device *dev = np->dev;
168
169 if (!dev || !netif_running(dev) || !dev->poll_controller)
159 return; 170 return;
160 171
161 /* Process pending work on NIC */ 172 /* Process pending work on NIC */
162 np->dev->poll_controller(np->dev); 173 dev->poll_controller(dev);
163 if (!list_empty(&np->dev->napi_list))
164 poll_napi(np);
165 174
166 service_arp_queue(np->dev->npinfo); 175 poll_napi(dev);
176
177 service_arp_queue(dev->npinfo);
167 178
168 zap_completion_queue(); 179 zap_completion_queue();
169} 180}
@@ -345,8 +356,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
345 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 356 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
346 skb_reset_mac_header(skb); 357 skb_reset_mac_header(skb);
347 skb->protocol = eth->h_proto = htons(ETH_P_IP); 358 skb->protocol = eth->h_proto = htons(ETH_P_IP);
348 memcpy(eth->h_source, np->local_mac, 6); 359 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
349 memcpy(eth->h_dest, np->remote_mac, 6); 360 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
350 361
351 skb->dev = np->dev; 362 skb->dev = np->dev;
352 363
@@ -399,7 +410,8 @@ static void arp_reply(struct sk_buff *skb)
399 memcpy(&tip, arp_ptr, 4); 410 memcpy(&tip, arp_ptr, 4);
400 411
401 /* Should we ignore arp? */ 412 /* Should we ignore arp? */
402 if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip)) 413 if (tip != htonl(np->local_ip) ||
414 ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
403 return; 415 return;
404 416
405 size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); 417 size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
@@ -416,7 +428,7 @@ static void arp_reply(struct sk_buff *skb)
416 428
417 /* Fill the device header for the ARP frame */ 429 /* Fill the device header for the ARP frame */
418 if (dev_hard_header(send_skb, skb->dev, ptype, 430 if (dev_hard_header(send_skb, skb->dev, ptype,
419 sha, np->local_mac, 431 sha, np->dev->dev_addr,
420 send_skb->len) < 0) { 432 send_skb->len) < 0) {
421 kfree_skb(send_skb); 433 kfree_skb(send_skb);
422 return; 434 return;
@@ -460,7 +472,7 @@ int __netpoll_rx(struct sk_buff *skb)
460 if (skb->dev->type != ARPHRD_ETHER) 472 if (skb->dev->type != ARPHRD_ETHER)
461 goto out; 473 goto out;
462 474
463 /* check if netpoll clients need ARP */ 475 /* if receive ARP during middle of NAPI poll, then queue */
464 if (skb->protocol == htons(ETH_P_ARP) && 476 if (skb->protocol == htons(ETH_P_ARP) &&
465 atomic_read(&trapped)) { 477 atomic_read(&trapped)) {
466 skb_queue_tail(&npi->arp_tx, skb); 478 skb_queue_tail(&npi->arp_tx, skb);
@@ -522,6 +534,9 @@ int __netpoll_rx(struct sk_buff *skb)
522 return 1; 534 return 1;
523 535
524out: 536out:
537 /* If packet received while already in poll then just
538 * silently drop.
539 */
525 if (atomic_read(&trapped)) { 540 if (atomic_read(&trapped)) {
526 kfree_skb(skb); 541 kfree_skb(skb);
527 return 1; 542 return 1;
@@ -660,7 +675,6 @@ int netpoll_setup(struct netpoll *np)
660 goto release; 675 goto release;
661 } 676 }
662 677
663 npinfo->rx_flags = 0;
664 npinfo->rx_np = NULL; 678 npinfo->rx_np = NULL;
665 679
666 spin_lock_init(&npinfo->rx_lock); 680 spin_lock_init(&npinfo->rx_lock);
@@ -722,9 +736,6 @@ int netpoll_setup(struct netpoll *np)
722 } 736 }
723 } 737 }
724 738
725 if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
726 memcpy(np->local_mac, ndev->dev_addr, 6);
727
728 if (!np->local_ip) { 739 if (!np->local_ip) {
729 rcu_read_lock(); 740 rcu_read_lock();
730 in_dev = __in_dev_get_rcu(ndev); 741 in_dev = __in_dev_get_rcu(ndev);
@@ -745,7 +756,6 @@ int netpoll_setup(struct netpoll *np)
745 756
746 if (np->rx_hook) { 757 if (np->rx_hook) {
747 spin_lock_irqsave(&npinfo->rx_lock, flags); 758 spin_lock_irqsave(&npinfo->rx_lock, flags);
748 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
749 npinfo->rx_np = np; 759 npinfo->rx_np = np;
750 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 760 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
751 } 761 }
@@ -787,7 +797,6 @@ void netpoll_cleanup(struct netpoll *np)
787 if (npinfo->rx_np == np) { 797 if (npinfo->rx_np == np) {
788 spin_lock_irqsave(&npinfo->rx_lock, flags); 798 spin_lock_irqsave(&npinfo->rx_lock, flags);
789 npinfo->rx_np = NULL; 799 npinfo->rx_np = NULL;
790 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
791 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 800 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
792 } 801 }
793 802
@@ -797,11 +806,7 @@ void netpoll_cleanup(struct netpoll *np)
797 cancel_rearming_delayed_work(&npinfo->tx_work); 806 cancel_rearming_delayed_work(&npinfo->tx_work);
798 807
799 /* clean after last, unfinished work */ 808 /* clean after last, unfinished work */
800 if (!skb_queue_empty(&npinfo->txq)) { 809 __skb_queue_purge(&npinfo->txq);
801 struct sk_buff *skb;
802 skb = __skb_dequeue(&npinfo->txq);
803 kfree_skb(skb);
804 }
805 kfree(npinfo); 810 kfree(npinfo);
806 np->dev->npinfo = NULL; 811 np->dev->npinfo = NULL;
807 } 812 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index de33f36947e9..bfcdfaebca5c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -170,8 +170,6 @@
170 170
171#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n" 171#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n"
172 172
173/* The buckets are exponential in 'width' */
174#define LAT_BUCKETS_MAX 32
175#define IP_NAME_SZ 32 173#define IP_NAME_SZ 32
176#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ 174#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
177#define MPLS_STACK_BOTTOM htonl(0x00000100) 175#define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -397,62 +395,6 @@ struct pktgen_thread {
397#define REMOVE 1 395#define REMOVE 1
398#define FIND 0 396#define FIND 0
399 397
400/* This code works around the fact that do_div cannot handle two 64-bit
401 numbers, and regular 64-bit division doesn't work on x86 kernels.
402 --Ben
403*/
404
405#define PG_DIV 0
406
407/* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net>
408 * Function copied/adapted/optimized from:
409 *
410 * nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html
411 *
412 * Copyright 1994, University of Cambridge Computer Laboratory
413 * All Rights Reserved.
414 *
415 */
416static inline s64 divremdi3(s64 x, s64 y, int type)
417{
418 u64 a = (x < 0) ? -x : x;
419 u64 b = (y < 0) ? -y : y;
420 u64 res = 0, d = 1;
421
422 if (b > 0) {
423 while (b < a) {
424 b <<= 1;
425 d <<= 1;
426 }
427 }
428
429 do {
430 if (a >= b) {
431 a -= b;
432 res += d;
433 }
434 b >>= 1;
435 d >>= 1;
436 }
437 while (d);
438
439 if (PG_DIV == type) {
440 return (((x ^ y) & (1ll << 63)) == 0) ? res : -(s64) res;
441 } else {
442 return ((x & (1ll << 63)) == 0) ? a : -(s64) a;
443 }
444}
445
446/* End of hacks to deal with 64-bit math on x86 */
447
448/** Convert to milliseconds */
449static inline __u64 tv_to_ms(const struct timeval *tv)
450{
451 __u64 ms = tv->tv_usec / 1000;
452 ms += (__u64) tv->tv_sec * (__u64) 1000;
453 return ms;
454}
455
456/** Convert to micro-seconds */ 398/** Convert to micro-seconds */
457static inline __u64 tv_to_us(const struct timeval *tv) 399static inline __u64 tv_to_us(const struct timeval *tv)
458{ 400{
@@ -461,51 +403,13 @@ static inline __u64 tv_to_us(const struct timeval *tv)
461 return us; 403 return us;
462} 404}
463 405
464static inline __u64 pg_div(__u64 n, __u32 base) 406static __u64 getCurUs(void)
465{
466 __u64 tmp = n;
467 do_div(tmp, base);
468 /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n",
469 n, base, tmp); */
470 return tmp;
471}
472
473static inline __u64 pg_div64(__u64 n, __u64 base)
474{
475 __u64 tmp = n;
476/*
477 * How do we know if the architecture we are running on
478 * supports division with 64 bit base?
479 *
480 */
481#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__)
482
483 do_div(tmp, base);
484#else
485 tmp = divremdi3(n, base, PG_DIV);
486#endif
487 return tmp;
488}
489
490static inline __u64 getCurMs(void)
491{
492 struct timeval tv;
493 do_gettimeofday(&tv);
494 return tv_to_ms(&tv);
495}
496
497static inline __u64 getCurUs(void)
498{ 407{
499 struct timeval tv; 408 struct timeval tv;
500 do_gettimeofday(&tv); 409 do_gettimeofday(&tv);
501 return tv_to_us(&tv); 410 return tv_to_us(&tv);
502} 411}
503 412
504static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b)
505{
506 return tv_to_us(a) - tv_to_us(b);
507}
508
509/* old include end */ 413/* old include end */
510 414
511static char version[] __initdata = VERSION; 415static char version[] __initdata = VERSION;
@@ -2138,7 +2042,6 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
2138 __u64 now; 2042 __u64 now;
2139 2043
2140 start = now = getCurUs(); 2044 start = now = getCurUs();
2141 printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now));
2142 while (now < spin_until_us) { 2045 while (now < spin_until_us) {
2143 /* TODO: optimize sleeping behavior */ 2046 /* TODO: optimize sleeping behavior */
2144 if (spin_until_us - now > jiffies_to_usecs(1) + 1) 2047 if (spin_until_us - now > jiffies_to_usecs(1) + 1)
@@ -2358,9 +2261,11 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2358 t = random32() % (imx - imn) + imn; 2261 t = random32() % (imx - imn) + imn;
2359 s = htonl(t); 2262 s = htonl(t);
2360 2263
2361 while (LOOPBACK(s) || MULTICAST(s) 2264 while (ipv4_is_loopback(s) ||
2362 || BADCLASS(s) || ZERONET(s) 2265 ipv4_is_multicast(s) ||
2363 || LOCAL_MCAST(s)) { 2266 ipv4_is_lbcast(s) ||
2267 ipv4_is_zeronet(s) ||
2268 ipv4_is_local_multicast(s)) {
2364 t = random32() % (imx - imn) + imn; 2269 t = random32() % (imx - imn) + imn;
2365 s = htonl(t); 2270 s = htonl(t);
2366 } 2271 }
@@ -2463,8 +2368,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
2463 2368
2464 x->curlft.bytes +=skb->len; 2369 x->curlft.bytes +=skb->len;
2465 x->curlft.packets++; 2370 x->curlft.packets++;
2466 spin_unlock(&x->lock);
2467
2468error: 2371error:
2469 spin_unlock(&x->lock); 2372 spin_unlock(&x->lock);
2470 return err; 2373 return err;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 5f0818d815e6..2d3035d3abd7 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -69,7 +69,38 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
69 return 0; 69 return 0;
70} 70}
71 71
72EXPORT_SYMBOL(reqsk_queue_alloc); 72void __reqsk_queue_destroy(struct request_sock_queue *queue)
73{
74 struct listen_sock *lopt;
75 size_t lopt_size;
76
77 /*
78 * this is an error recovery path only
79 * no locking needed and the lopt is not NULL
80 */
81
82 lopt = queue->listen_opt;
83 lopt_size = sizeof(struct listen_sock) +
84 lopt->nr_table_entries * sizeof(struct request_sock *);
85
86 if (lopt_size > PAGE_SIZE)
87 vfree(lopt);
88 else
89 kfree(lopt);
90}
91
92static inline struct listen_sock *reqsk_queue_yank_listen_sk(
93 struct request_sock_queue *queue)
94{
95 struct listen_sock *lopt;
96
97 write_lock_bh(&queue->syn_wait_lock);
98 lopt = queue->listen_opt;
99 queue->listen_opt = NULL;
100 write_unlock_bh(&queue->syn_wait_lock);
101
102 return lopt;
103}
73 104
74void reqsk_queue_destroy(struct request_sock_queue *queue) 105void reqsk_queue_destroy(struct request_sock_queue *queue)
75{ 106{
@@ -99,4 +130,3 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
99 kfree(lopt); 130 kfree(lopt);
100} 131}
101 132
102EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e1ba26fb4bf2..ddbdde82a700 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link
60}; 60};
61 61
62static DEFINE_MUTEX(rtnl_mutex); 62static DEFINE_MUTEX(rtnl_mutex);
63static struct sock *rtnl;
64 63
65void rtnl_lock(void) 64void rtnl_lock(void)
66{ 65{
@@ -308,9 +307,12 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
308 struct net *net; 307 struct net *net;
309 308
310 for_each_net(net) { 309 for_each_net(net) {
310restart:
311 for_each_netdev_safe(net, dev, n) { 311 for_each_netdev_safe(net, dev, n) {
312 if (dev->rtnl_link_ops == ops) 312 if (dev->rtnl_link_ops == ops) {
313 ops->dellink(dev); 313 ops->dellink(dev);
314 goto restart;
315 }
314 } 316 }
315 } 317 }
316 list_del(&ops->list); 318 list_del(&ops->list);
@@ -455,8 +457,9 @@ size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
455 return ret; 457 return ret;
456} 458}
457 459
458int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) 460int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
459{ 461{
462 struct sock *rtnl = net->rtnl;
460 int err = 0; 463 int err = 0;
461 464
462 NETLINK_CB(skb).dst_group = group; 465 NETLINK_CB(skb).dst_group = group;
@@ -468,14 +471,17 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
468 return err; 471 return err;
469} 472}
470 473
471int rtnl_unicast(struct sk_buff *skb, u32 pid) 474int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
472{ 475{
476 struct sock *rtnl = net->rtnl;
477
473 return nlmsg_unicast(rtnl, skb, pid); 478 return nlmsg_unicast(rtnl, skb, pid);
474} 479}
475 480
476int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, 481int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
477 struct nlmsghdr *nlh, gfp_t flags) 482 struct nlmsghdr *nlh, gfp_t flags)
478{ 483{
484 struct sock *rtnl = net->rtnl;
479 int report = 0; 485 int report = 0;
480 486
481 if (nlh) 487 if (nlh)
@@ -484,8 +490,10 @@ int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
484 return nlmsg_notify(rtnl, skb, pid, group, report, flags); 490 return nlmsg_notify(rtnl, skb, pid, group, report, flags);
485} 491}
486 492
487void rtnl_set_sk_err(u32 group, int error) 493void rtnl_set_sk_err(struct net *net, u32 group, int error)
488{ 494{
495 struct sock *rtnl = net->rtnl;
496
489 netlink_set_err(rtnl, 0, group, error); 497 netlink_set_err(rtnl, 0, group, error);
490} 498}
491 499
@@ -1183,7 +1191,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1183 kfree_skb(nskb); 1191 kfree_skb(nskb);
1184 goto errout; 1192 goto errout;
1185 } 1193 }
1186 err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); 1194 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
1187errout: 1195errout:
1188 dev_put(dev); 1196 dev_put(dev);
1189 1197
@@ -1216,6 +1224,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1216 1224
1217void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) 1225void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
1218{ 1226{
1227 struct net *net = dev->nd_net;
1219 struct sk_buff *skb; 1228 struct sk_buff *skb;
1220 int err = -ENOBUFS; 1229 int err = -ENOBUFS;
1221 1230
@@ -1230,10 +1239,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
1230 kfree_skb(skb); 1239 kfree_skb(skb);
1231 goto errout; 1240 goto errout;
1232 } 1241 }
1233 err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); 1242 err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
1234errout: 1243errout:
1235 if (err < 0) 1244 if (err < 0)
1236 rtnl_set_sk_err(RTNLGRP_LINK, err); 1245 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
1237} 1246}
1238 1247
1239/* Protected by RTNL sempahore. */ 1248/* Protected by RTNL sempahore. */
@@ -1244,6 +1253,7 @@ static int rtattr_max;
1244 1253
1245static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 1254static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1246{ 1255{
1256 struct net *net = skb->sk->sk_net;
1247 rtnl_doit_func doit; 1257 rtnl_doit_func doit;
1248 int sz_idx, kind; 1258 int sz_idx, kind;
1249 int min_len; 1259 int min_len;
@@ -1272,6 +1282,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1272 return -EPERM; 1282 return -EPERM;
1273 1283
1274 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { 1284 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
1285 struct sock *rtnl;
1275 rtnl_dumpit_func dumpit; 1286 rtnl_dumpit_func dumpit;
1276 1287
1277 dumpit = rtnl_get_dumpit(family, type); 1288 dumpit = rtnl_get_dumpit(family, type);
@@ -1279,6 +1290,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1279 return -EOPNOTSUPP; 1290 return -EOPNOTSUPP;
1280 1291
1281 __rtnl_unlock(); 1292 __rtnl_unlock();
1293 rtnl = net->rtnl;
1282 err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); 1294 err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
1283 rtnl_lock(); 1295 rtnl_lock();
1284 return err; 1296 return err;
@@ -1323,9 +1335,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
1323{ 1335{
1324 struct net_device *dev = ptr; 1336 struct net_device *dev = ptr;
1325 1337
1326 if (dev->nd_net != &init_net)
1327 return NOTIFY_DONE;
1328
1329 switch (event) { 1338 switch (event) {
1330 case NETDEV_UNREGISTER: 1339 case NETDEV_UNREGISTER:
1331 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 1340 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
@@ -1351,6 +1360,29 @@ static struct notifier_block rtnetlink_dev_notifier = {
1351 .notifier_call = rtnetlink_event, 1360 .notifier_call = rtnetlink_event,
1352}; 1361};
1353 1362
1363
1364static int rtnetlink_net_init(struct net *net)
1365{
1366 struct sock *sk;
1367 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
1368 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
1369 if (!sk)
1370 return -ENOMEM;
1371 net->rtnl = sk;
1372 return 0;
1373}
1374
1375static void rtnetlink_net_exit(struct net *net)
1376{
1377 netlink_kernel_release(net->rtnl);
1378 net->rtnl = NULL;
1379}
1380
1381static struct pernet_operations rtnetlink_net_ops = {
1382 .init = rtnetlink_net_init,
1383 .exit = rtnetlink_net_exit,
1384};
1385
1354void __init rtnetlink_init(void) 1386void __init rtnetlink_init(void)
1355{ 1387{
1356 int i; 1388 int i;
@@ -1363,10 +1395,9 @@ void __init rtnetlink_init(void)
1363 if (!rta_buf) 1395 if (!rta_buf)
1364 panic("rtnetlink_init: cannot allocate rta_buf\n"); 1396 panic("rtnetlink_init: cannot allocate rta_buf\n");
1365 1397
1366 rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX, 1398 if (register_pernet_subsys(&rtnetlink_net_ops))
1367 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
1368 if (rtnl == NULL)
1369 panic("rtnetlink_init: cannot initialize rtnetlink\n"); 1399 panic("rtnetlink_init: cannot initialize rtnetlink\n");
1400
1370 netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); 1401 netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
1371 register_netdevice_notifier(&rtnetlink_dev_notifier); 1402 register_netdevice_notifier(&rtnetlink_dev_notifier);
1372 1403
diff --git a/net/core/scm.c b/net/core/scm.c
index 100ba6d9d478..10f5c65f6a47 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -196,6 +196,8 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
196 if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr))) 196 if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
197 goto out; 197 goto out;
198 cmlen = CMSG_SPACE(len); 198 cmlen = CMSG_SPACE(len);
199 if (msg->msg_controllen < cmlen)
200 cmlen = msg->msg_controllen;
199 msg->msg_control += cmlen; 201 msg->msg_control += cmlen;
200 msg->msg_controllen -= cmlen; 202 msg->msg_controllen -= cmlen;
201 err = 0; 203 err = 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 573e17240197..98420f9c4b6d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -52,6 +52,7 @@
52#endif 52#endif
53#include <linux/string.h> 53#include <linux/string.h>
54#include <linux/skbuff.h> 54#include <linux/skbuff.h>
55#include <linux/splice.h>
55#include <linux/cache.h> 56#include <linux/cache.h>
56#include <linux/rtnetlink.h> 57#include <linux/rtnetlink.h>
57#include <linux/init.h> 58#include <linux/init.h>
@@ -71,6 +72,40 @@
71static struct kmem_cache *skbuff_head_cache __read_mostly; 72static struct kmem_cache *skbuff_head_cache __read_mostly;
72static struct kmem_cache *skbuff_fclone_cache __read_mostly; 73static struct kmem_cache *skbuff_fclone_cache __read_mostly;
73 74
75static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
76 struct pipe_buffer *buf)
77{
78 struct sk_buff *skb = (struct sk_buff *) buf->private;
79
80 kfree_skb(skb);
81}
82
83static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
84 struct pipe_buffer *buf)
85{
86 struct sk_buff *skb = (struct sk_buff *) buf->private;
87
88 skb_get(skb);
89}
90
91static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
92 struct pipe_buffer *buf)
93{
94 return 1;
95}
96
97
98/* Pipe buffer operations for a socket. */
99static struct pipe_buf_operations sock_pipe_buf_ops = {
100 .can_merge = 0,
101 .map = generic_pipe_buf_map,
102 .unmap = generic_pipe_buf_unmap,
103 .confirm = generic_pipe_buf_confirm,
104 .release = sock_pipe_buf_release,
105 .steal = sock_pipe_buf_steal,
106 .get = sock_pipe_buf_get,
107};
108
74/* 109/*
75 * Keep out-of-line to prevent kernel bloat. 110 * Keep out-of-line to prevent kernel bloat.
76 * __builtin_return_address is not used because it is not always 111 * __builtin_return_address is not used because it is not always
@@ -275,12 +310,11 @@ static void skb_release_data(struct sk_buff *skb)
275/* 310/*
276 * Free an skbuff by memory without cleaning the state. 311 * Free an skbuff by memory without cleaning the state.
277 */ 312 */
278void kfree_skbmem(struct sk_buff *skb) 313static void kfree_skbmem(struct sk_buff *skb)
279{ 314{
280 struct sk_buff *other; 315 struct sk_buff *other;
281 atomic_t *fclone_ref; 316 atomic_t *fclone_ref;
282 317
283 skb_release_data(skb);
284 switch (skb->fclone) { 318 switch (skb->fclone) {
285 case SKB_FCLONE_UNAVAILABLE: 319 case SKB_FCLONE_UNAVAILABLE:
286 kmem_cache_free(skbuff_head_cache, skb); 320 kmem_cache_free(skbuff_head_cache, skb);
@@ -307,16 +341,8 @@ void kfree_skbmem(struct sk_buff *skb)
307 } 341 }
308} 342}
309 343
310/** 344/* Free everything but the sk_buff shell. */
311 * __kfree_skb - private function 345static void skb_release_all(struct sk_buff *skb)
312 * @skb: buffer
313 *
314 * Free an sk_buff. Release anything attached to the buffer.
315 * Clean the state. This is an internal helper function. Users should
316 * always call kfree_skb
317 */
318
319void __kfree_skb(struct sk_buff *skb)
320{ 346{
321 dst_release(skb->dst); 347 dst_release(skb->dst);
322#ifdef CONFIG_XFRM 348#ifdef CONFIG_XFRM
@@ -340,7 +366,21 @@ void __kfree_skb(struct sk_buff *skb)
340 skb->tc_verd = 0; 366 skb->tc_verd = 0;
341#endif 367#endif
342#endif 368#endif
369 skb_release_data(skb);
370}
371
372/**
373 * __kfree_skb - private function
374 * @skb: buffer
375 *
376 * Free an sk_buff. Release anything attached to the buffer.
377 * Clean the state. This is an internal helper function. Users should
378 * always call kfree_skb
379 */
343 380
381void __kfree_skb(struct sk_buff *skb)
382{
383 skb_release_all(skb);
344 kfree_skbmem(skb); 384 kfree_skbmem(skb);
345} 385}
346 386
@@ -411,16 +451,17 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
411 C(len); 451 C(len);
412 C(data_len); 452 C(data_len);
413 C(mac_len); 453 C(mac_len);
414 n->cloned = 1;
415 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; 454 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
455 n->cloned = 1;
416 n->nohdr = 0; 456 n->nohdr = 0;
417 n->destructor = NULL; 457 n->destructor = NULL;
418 C(truesize); 458 C(iif);
419 atomic_set(&n->users, 1);
420 C(head);
421 C(data);
422 C(tail); 459 C(tail);
423 C(end); 460 C(end);
461 C(head);
462 C(data);
463 C(truesize);
464 atomic_set(&n->users, 1);
424 465
425 atomic_inc(&(skb_shinfo(skb)->dataref)); 466 atomic_inc(&(skb_shinfo(skb)->dataref));
426 skb->cloned = 1; 467 skb->cloned = 1;
@@ -441,7 +482,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
441 */ 482 */
442struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) 483struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
443{ 484{
444 skb_release_data(dst); 485 skb_release_all(dst);
445 return __skb_clone(dst, src); 486 return __skb_clone(dst, src);
446} 487}
447EXPORT_SYMBOL_GPL(skb_morph); 488EXPORT_SYMBOL_GPL(skb_morph);
@@ -1116,6 +1157,217 @@ fault:
1116 return -EFAULT; 1157 return -EFAULT;
1117} 1158}
1118 1159
1160/*
1161 * Callback from splice_to_pipe(), if we need to release some pages
1162 * at the end of the spd in case we error'ed out in filling the pipe.
1163 */
1164static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1165{
1166 struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
1167
1168 kfree_skb(skb);
1169}
1170
1171/*
1172 * Fill page/offset/length into spd, if it can hold more pages.
1173 */
1174static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1175 unsigned int len, unsigned int offset,
1176 struct sk_buff *skb)
1177{
1178 if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1179 return 1;
1180
1181 spd->pages[spd->nr_pages] = page;
1182 spd->partial[spd->nr_pages].len = len;
1183 spd->partial[spd->nr_pages].offset = offset;
1184 spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
1185 spd->nr_pages++;
1186 return 0;
1187}
1188
1189/*
1190 * Map linear and fragment data from the skb to spd. Returns number of
1191 * pages mapped.
1192 */
1193static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1194 unsigned int *total_len,
1195 struct splice_pipe_desc *spd)
1196{
1197 unsigned int nr_pages = spd->nr_pages;
1198 unsigned int poff, plen, len, toff, tlen;
1199 int headlen, seg;
1200
1201 toff = *offset;
1202 tlen = *total_len;
1203 if (!tlen)
1204 goto err;
1205
1206 /*
1207 * if the offset is greater than the linear part, go directly to
1208 * the fragments.
1209 */
1210 headlen = skb_headlen(skb);
1211 if (toff >= headlen) {
1212 toff -= headlen;
1213 goto map_frag;
1214 }
1215
1216 /*
1217 * first map the linear region into the pages/partial map, skipping
1218 * any potential initial offset.
1219 */
1220 len = 0;
1221 while (len < headlen) {
1222 void *p = skb->data + len;
1223
1224 poff = (unsigned long) p & (PAGE_SIZE - 1);
1225 plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
1226 len += plen;
1227
1228 if (toff) {
1229 if (plen <= toff) {
1230 toff -= plen;
1231 continue;
1232 }
1233 plen -= toff;
1234 poff += toff;
1235 toff = 0;
1236 }
1237
1238 plen = min(plen, tlen);
1239 if (!plen)
1240 break;
1241
1242 /*
1243 * just jump directly to update and return, no point
1244 * in going over fragments when the output is full.
1245 */
1246 if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
1247 goto done;
1248
1249 tlen -= plen;
1250 }
1251
1252 /*
1253 * then map the fragments
1254 */
1255map_frag:
1256 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1257 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1258
1259 plen = f->size;
1260 poff = f->page_offset;
1261
1262 if (toff) {
1263 if (plen <= toff) {
1264 toff -= plen;
1265 continue;
1266 }
1267 plen -= toff;
1268 poff += toff;
1269 toff = 0;
1270 }
1271
1272 plen = min(plen, tlen);
1273 if (!plen)
1274 break;
1275
1276 if (spd_fill_page(spd, f->page, plen, poff, skb))
1277 break;
1278
1279 tlen -= plen;
1280 }
1281
1282done:
1283 if (spd->nr_pages - nr_pages) {
1284 *offset = 0;
1285 *total_len = tlen;
1286 return 0;
1287 }
1288err:
1289 return 1;
1290}
1291
1292/*
1293 * Map data from the skb to a pipe. Should handle both the linear part,
1294 * the fragments, and the frag list. It does NOT handle frag lists within
1295 * the frag list, if such a thing exists. We'd probably need to recurse to
1296 * handle that cleanly.
1297 */
1298int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
1299 struct pipe_inode_info *pipe, unsigned int tlen,
1300 unsigned int flags)
1301{
1302 struct partial_page partial[PIPE_BUFFERS];
1303 struct page *pages[PIPE_BUFFERS];
1304 struct splice_pipe_desc spd = {
1305 .pages = pages,
1306 .partial = partial,
1307 .flags = flags,
1308 .ops = &sock_pipe_buf_ops,
1309 .spd_release = sock_spd_release,
1310 };
1311 struct sk_buff *skb;
1312
1313 /*
1314 * I'd love to avoid the clone here, but tcp_read_sock()
1315 * ignores reference counts and unconditonally kills the sk_buff
1316 * on return from the actor.
1317 */
1318 skb = skb_clone(__skb, GFP_KERNEL);
1319 if (unlikely(!skb))
1320 return -ENOMEM;
1321
1322 /*
1323 * __skb_splice_bits() only fails if the output has no room left,
1324 * so no point in going over the frag_list for the error case.
1325 */
1326 if (__skb_splice_bits(skb, &offset, &tlen, &spd))
1327 goto done;
1328 else if (!tlen)
1329 goto done;
1330
1331 /*
1332 * now see if we have a frag_list to map
1333 */
1334 if (skb_shinfo(skb)->frag_list) {
1335 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1336
1337 for (; list && tlen; list = list->next) {
1338 if (__skb_splice_bits(list, &offset, &tlen, &spd))
1339 break;
1340 }
1341 }
1342
1343done:
1344 /*
1345 * drop our reference to the clone, the pipe consumption will
1346 * drop the rest.
1347 */
1348 kfree_skb(skb);
1349
1350 if (spd.nr_pages) {
1351 int ret;
1352
1353 /*
1354 * Drop the socket lock, otherwise we have reverse
1355 * locking dependencies between sk_lock and i_mutex
1356 * here as compared to sendfile(). We enter here
1357 * with the socket lock held, and splice_to_pipe() will
1358 * grab the pipe inode lock. For sendfile() emulation,
1359 * we call into ->sendpage() with the i_mutex lock held
1360 * and networking will grab the socket lock.
1361 */
1362 release_sock(__skb->sk);
1363 ret = splice_to_pipe(pipe, &spd);
1364 lock_sock(__skb->sk);
1365 return ret;
1366 }
1367
1368 return 0;
1369}
1370
1119/** 1371/**
1120 * skb_store_bits - store bits from kernel buffer to skb 1372 * skb_store_bits - store bits from kernel buffer to skb
1121 * @skb: destination buffer 1373 * @skb: destination buffer
@@ -2028,8 +2280,8 @@ void __init skb_init(void)
2028 * Fill the specified scatter-gather list with mappings/pointers into a 2280 * Fill the specified scatter-gather list with mappings/pointers into a
2029 * region of the buffer space attached to a socket buffer. 2281 * region of the buffer space attached to a socket buffer.
2030 */ 2282 */
2031int 2283static int
2032skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) 2284__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2033{ 2285{
2034 int start = skb_headlen(skb); 2286 int start = skb_headlen(skb);
2035 int i, copy = start - offset; 2287 int i, copy = start - offset;
@@ -2078,7 +2330,8 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2078 if ((copy = end - offset) > 0) { 2330 if ((copy = end - offset) > 0) {
2079 if (copy > len) 2331 if (copy > len)
2080 copy = len; 2332 copy = len;
2081 elt += skb_to_sgvec(list, sg+elt, offset - start, copy); 2333 elt += __skb_to_sgvec(list, sg+elt, offset - start,
2334 copy);
2082 if ((len -= copy) == 0) 2335 if ((len -= copy) == 0)
2083 return elt; 2336 return elt;
2084 offset += copy; 2337 offset += copy;
@@ -2090,6 +2343,15 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2090 return elt; 2343 return elt;
2091} 2344}
2092 2345
2346int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2347{
2348 int nsg = __skb_to_sgvec(skb, sg, offset, len);
2349
2350 sg_mark_end(&sg[nsg - 1]);
2351
2352 return nsg;
2353}
2354
2093/** 2355/**
2094 * skb_cow_data - Check that a socket buffer's data buffers are writable 2356 * skb_cow_data - Check that a socket buffer's data buffers are writable
2095 * @skb: The socket buffer to check. 2357 * @skb: The socket buffer to check.
diff --git a/net/core/sock.c b/net/core/sock.c
index bba9949681ff..433715fb141a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
154 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 154 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
155 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 155 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
156 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 156 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
157 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , 157 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
158 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 158 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
159 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX" 159 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
160}; 160};
@@ -168,7 +168,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
168 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 168 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
169 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , 169 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
170 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 170 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
171 "slock-27" , "slock-28" , "slock-29" , 171 "slock-27" , "slock-28" , "slock-AF_CAN" ,
172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
173 "slock-AF_RXRPC" , "slock-AF_MAX" 173 "slock-AF_RXRPC" , "slock-AF_MAX"
174}; 174};
@@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
282 if (err) 282 if (err)
283 goto out; 283 goto out;
284 284
285 if (!sk_rmem_schedule(sk, skb->truesize)) {
286 err = -ENOBUFS;
287 goto out;
288 }
289
285 skb->dev = NULL; 290 skb->dev = NULL;
286 skb_set_owner_r(skb, sk); 291 skb_set_owner_r(skb, sk);
287 292
@@ -419,6 +424,14 @@ out:
419 return ret; 424 return ret;
420} 425}
421 426
427static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
428{
429 if (valbool)
430 sock_set_flag(sk, bit);
431 else
432 sock_reset_flag(sk, bit);
433}
434
422/* 435/*
423 * This is meant for all protocols to use and covers goings on 436 * This is meant for all protocols to use and covers goings on
424 * at the socket level. Everything here is generic. 437 * at the socket level. Everything here is generic.
@@ -463,11 +476,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
463 case SO_DEBUG: 476 case SO_DEBUG:
464 if (val && !capable(CAP_NET_ADMIN)) { 477 if (val && !capable(CAP_NET_ADMIN)) {
465 ret = -EACCES; 478 ret = -EACCES;
466 } 479 } else
467 else if (valbool) 480 sock_valbool_flag(sk, SOCK_DBG, valbool);
468 sock_set_flag(sk, SOCK_DBG);
469 else
470 sock_reset_flag(sk, SOCK_DBG);
471 break; 481 break;
472 case SO_REUSEADDR: 482 case SO_REUSEADDR:
473 sk->sk_reuse = valbool; 483 sk->sk_reuse = valbool;
@@ -477,10 +487,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
477 ret = -ENOPROTOOPT; 487 ret = -ENOPROTOOPT;
478 break; 488 break;
479 case SO_DONTROUTE: 489 case SO_DONTROUTE:
480 if (valbool) 490 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
481 sock_set_flag(sk, SOCK_LOCALROUTE);
482 else
483 sock_reset_flag(sk, SOCK_LOCALROUTE);
484 break; 491 break;
485 case SO_BROADCAST: 492 case SO_BROADCAST:
486 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 493 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
@@ -660,6 +667,13 @@ set_rcvbuf:
660 else 667 else
661 clear_bit(SOCK_PASSSEC, &sock->flags); 668 clear_bit(SOCK_PASSSEC, &sock->flags);
662 break; 669 break;
670 case SO_MARK:
671 if (!capable(CAP_NET_ADMIN))
672 ret = -EPERM;
673 else {
674 sk->sk_mark = val;
675 }
676 break;
663 677
664 /* We implement the SO_SNDLOWAT etc to 678 /* We implement the SO_SNDLOWAT etc to
665 not be settable (1003.1g 5.3) */ 679 not be settable (1003.1g 5.3) */
@@ -829,6 +843,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
829 case SO_PEERSEC: 843 case SO_PEERSEC:
830 return security_socket_getpeersec_stream(sock, optval, optlen, len); 844 return security_socket_getpeersec_stream(sock, optval, optlen, len);
831 845
846 case SO_MARK:
847 v.val = sk->sk_mark;
848 break;
849
832 default: 850 default:
833 return -ENOPROTOOPT; 851 return -ENOPROTOOPT;
834 } 852 }
@@ -857,46 +875,43 @@ static inline void sock_lock_init(struct sock *sk)
857 af_family_keys + sk->sk_family); 875 af_family_keys + sk->sk_family);
858} 876}
859 877
860/** 878static void sock_copy(struct sock *nsk, const struct sock *osk)
861 * sk_alloc - All socket objects are allocated here 879{
862 * @net: the applicable net namespace 880#ifdef CONFIG_SECURITY_NETWORK
863 * @family: protocol family 881 void *sptr = nsk->sk_security;
864 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 882#endif
865 * @prot: struct proto associated with this new sock instance 883
866 * @zero_it: if we should zero the newly allocated sock 884 memcpy(nsk, osk, osk->sk_prot->obj_size);
867 */ 885#ifdef CONFIG_SECURITY_NETWORK
868struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 886 nsk->sk_security = sptr;
869 struct proto *prot, int zero_it) 887 security_sk_clone(osk, nsk);
888#endif
889}
890
891static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
892 int family)
870{ 893{
871 struct sock *sk = NULL; 894 struct sock *sk;
872 struct kmem_cache *slab = prot->slab; 895 struct kmem_cache *slab;
873 896
897 slab = prot->slab;
874 if (slab != NULL) 898 if (slab != NULL)
875 sk = kmem_cache_alloc(slab, priority); 899 sk = kmem_cache_alloc(slab, priority);
876 else 900 else
877 sk = kmalloc(prot->obj_size, priority); 901 sk = kmalloc(prot->obj_size, priority);
878 902
879 if (sk) { 903 if (sk != NULL) {
880 if (zero_it) {
881 memset(sk, 0, prot->obj_size);
882 sk->sk_family = family;
883 /*
884 * See comment in struct sock definition to understand
885 * why we need sk_prot_creator -acme
886 */
887 sk->sk_prot = sk->sk_prot_creator = prot;
888 sock_lock_init(sk);
889 sk->sk_net = get_net(net);
890 }
891
892 if (security_sk_alloc(sk, family, priority)) 904 if (security_sk_alloc(sk, family, priority))
893 goto out_free; 905 goto out_free;
894 906
895 if (!try_module_get(prot->owner)) 907 if (!try_module_get(prot->owner))
896 goto out_free; 908 goto out_free_sec;
897 } 909 }
910
898 return sk; 911 return sk;
899 912
913out_free_sec:
914 security_sk_free(sk);
900out_free: 915out_free:
901 if (slab != NULL) 916 if (slab != NULL)
902 kmem_cache_free(slab, sk); 917 kmem_cache_free(slab, sk);
@@ -905,10 +920,53 @@ out_free:
905 return NULL; 920 return NULL;
906} 921}
907 922
923static void sk_prot_free(struct proto *prot, struct sock *sk)
924{
925 struct kmem_cache *slab;
926 struct module *owner;
927
928 owner = prot->owner;
929 slab = prot->slab;
930
931 security_sk_free(sk);
932 if (slab != NULL)
933 kmem_cache_free(slab, sk);
934 else
935 kfree(sk);
936 module_put(owner);
937}
938
939/**
940 * sk_alloc - All socket objects are allocated here
941 * @net: the applicable net namespace
942 * @family: protocol family
943 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
944 * @prot: struct proto associated with this new sock instance
945 * @zero_it: if we should zero the newly allocated sock
946 */
947struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
948 struct proto *prot)
949{
950 struct sock *sk;
951
952 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
953 if (sk) {
954 sk->sk_family = family;
955 /*
956 * See comment in struct sock definition to understand
957 * why we need sk_prot_creator -acme
958 */
959 sk->sk_prot = sk->sk_prot_creator = prot;
960 sock_lock_init(sk);
961 sk->sk_net = get_net(net);
962 }
963
964 return sk;
965}
966
908void sk_free(struct sock *sk) 967void sk_free(struct sock *sk)
909{ 968{
910 struct sk_filter *filter; 969 struct sk_filter *filter;
911 struct module *owner = sk->sk_prot_creator->owner;
912 970
913 if (sk->sk_destruct) 971 if (sk->sk_destruct)
914 sk->sk_destruct(sk); 972 sk->sk_destruct(sk);
@@ -925,25 +983,22 @@ void sk_free(struct sock *sk)
925 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 983 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
926 __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); 984 __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
927 985
928 security_sk_free(sk);
929 put_net(sk->sk_net); 986 put_net(sk->sk_net);
930 if (sk->sk_prot_creator->slab != NULL) 987 sk_prot_free(sk->sk_prot_creator, sk);
931 kmem_cache_free(sk->sk_prot_creator->slab, sk);
932 else
933 kfree(sk);
934 module_put(owner);
935} 988}
936 989
937struct sock *sk_clone(const struct sock *sk, const gfp_t priority) 990struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
938{ 991{
939 struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); 992 struct sock *newsk;
940 993
994 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
941 if (newsk != NULL) { 995 if (newsk != NULL) {
942 struct sk_filter *filter; 996 struct sk_filter *filter;
943 997
944 sock_copy(newsk, sk); 998 sock_copy(newsk, sk);
945 999
946 /* SANITY */ 1000 /* SANITY */
1001 get_net(newsk->sk_net);
947 sk_node_init(&newsk->sk_node); 1002 sk_node_init(&newsk->sk_node);
948 sock_lock_init(newsk); 1003 sock_lock_init(newsk);
949 bh_lock_sock(newsk); 1004 bh_lock_sock(newsk);
@@ -1068,7 +1123,9 @@ void sock_rfree(struct sk_buff *skb)
1068{ 1123{
1069 struct sock *sk = skb->sk; 1124 struct sock *sk = skb->sk;
1070 1125
1126 skb_truesize_check(skb);
1071 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1127 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1128 sk_mem_uncharge(skb->sk, skb->truesize);
1072} 1129}
1073 1130
1074 1131
@@ -1345,6 +1402,103 @@ int sk_wait_data(struct sock *sk, long *timeo)
1345 1402
1346EXPORT_SYMBOL(sk_wait_data); 1403EXPORT_SYMBOL(sk_wait_data);
1347 1404
1405/**
1406 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1407 * @sk: socket
1408 * @size: memory size to allocate
1409 * @kind: allocation type
1410 *
1411 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1412 * rmem allocation. This function assumes that protocols which have
1413 * memory_pressure use sk_wmem_queued as write buffer accounting.
1414 */
1415int __sk_mem_schedule(struct sock *sk, int size, int kind)
1416{
1417 struct proto *prot = sk->sk_prot;
1418 int amt = sk_mem_pages(size);
1419 int allocated;
1420
1421 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1422 allocated = atomic_add_return(amt, prot->memory_allocated);
1423
1424 /* Under limit. */
1425 if (allocated <= prot->sysctl_mem[0]) {
1426 if (prot->memory_pressure && *prot->memory_pressure)
1427 *prot->memory_pressure = 0;
1428 return 1;
1429 }
1430
1431 /* Under pressure. */
1432 if (allocated > prot->sysctl_mem[1])
1433 if (prot->enter_memory_pressure)
1434 prot->enter_memory_pressure();
1435
1436 /* Over hard limit. */
1437 if (allocated > prot->sysctl_mem[2])
1438 goto suppress_allocation;
1439
1440 /* guarantee minimum buffer size under pressure */
1441 if (kind == SK_MEM_RECV) {
1442 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1443 return 1;
1444 } else { /* SK_MEM_SEND */
1445 if (sk->sk_type == SOCK_STREAM) {
1446 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1447 return 1;
1448 } else if (atomic_read(&sk->sk_wmem_alloc) <
1449 prot->sysctl_wmem[0])
1450 return 1;
1451 }
1452
1453 if (prot->memory_pressure) {
1454 if (!*prot->memory_pressure ||
1455 prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
1456 sk_mem_pages(sk->sk_wmem_queued +
1457 atomic_read(&sk->sk_rmem_alloc) +
1458 sk->sk_forward_alloc))
1459 return 1;
1460 }
1461
1462suppress_allocation:
1463
1464 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1465 sk_stream_moderate_sndbuf(sk);
1466
1467 /* Fail only if socket is _under_ its sndbuf.
1468 * In this case we cannot block, so that we have to fail.
1469 */
1470 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1471 return 1;
1472 }
1473
1474 /* Alas. Undo changes. */
1475 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1476 atomic_sub(amt, prot->memory_allocated);
1477 return 0;
1478}
1479
1480EXPORT_SYMBOL(__sk_mem_schedule);
1481
1482/**
1483 * __sk_reclaim - reclaim memory_allocated
1484 * @sk: socket
1485 */
1486void __sk_mem_reclaim(struct sock *sk)
1487{
1488 struct proto *prot = sk->sk_prot;
1489
1490 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
1491 prot->memory_allocated);
1492 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1493
1494 if (prot->memory_pressure && *prot->memory_pressure &&
1495 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1496 *prot->memory_pressure = 0;
1497}
1498
1499EXPORT_SYMBOL(__sk_mem_reclaim);
1500
1501
1348/* 1502/*
1349 * Set of default routines for initialising struct proto_ops when 1503 * Set of default routines for initialising struct proto_ops when
1350 * the protocol does not support a particular function. In certain 1504 * the protocol does not support a particular function. In certain
@@ -1459,7 +1613,7 @@ static void sock_def_error_report(struct sock *sk)
1459 read_lock(&sk->sk_callback_lock); 1613 read_lock(&sk->sk_callback_lock);
1460 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1614 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1461 wake_up_interruptible(sk->sk_sleep); 1615 wake_up_interruptible(sk->sk_sleep);
1462 sk_wake_async(sk,0,POLL_ERR); 1616 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1463 read_unlock(&sk->sk_callback_lock); 1617 read_unlock(&sk->sk_callback_lock);
1464} 1618}
1465 1619
@@ -1468,7 +1622,7 @@ static void sock_def_readable(struct sock *sk, int len)
1468 read_lock(&sk->sk_callback_lock); 1622 read_lock(&sk->sk_callback_lock);
1469 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1623 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1470 wake_up_interruptible(sk->sk_sleep); 1624 wake_up_interruptible(sk->sk_sleep);
1471 sk_wake_async(sk,1,POLL_IN); 1625 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1472 read_unlock(&sk->sk_callback_lock); 1626 read_unlock(&sk->sk_callback_lock);
1473} 1627}
1474 1628
@@ -1485,7 +1639,7 @@ static void sock_def_write_space(struct sock *sk)
1485 1639
1486 /* Should agree with poll, otherwise some programs break */ 1640 /* Should agree with poll, otherwise some programs break */
1487 if (sock_writeable(sk)) 1641 if (sock_writeable(sk))
1488 sk_wake_async(sk, 2, POLL_OUT); 1642 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1489 } 1643 }
1490 1644
1491 read_unlock(&sk->sk_callback_lock); 1645 read_unlock(&sk->sk_callback_lock);
@@ -1500,7 +1654,7 @@ void sk_send_sigurg(struct sock *sk)
1500{ 1654{
1501 if (sk->sk_socket && sk->sk_socket->file) 1655 if (sk->sk_socket && sk->sk_socket->file)
1502 if (send_sigurg(&sk->sk_socket->file->f_owner)) 1656 if (send_sigurg(&sk->sk_socket->file->f_owner))
1503 sk_wake_async(sk, 3, POLL_PRI); 1657 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1504} 1658}
1505 1659
1506void sk_reset_timer(struct sock *sk, struct timer_list* timer, 1660void sk_reset_timer(struct sock *sk, struct timer_list* timer,
@@ -1574,6 +1728,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1574 sk->sk_stamp = ktime_set(-1L, -1L); 1728 sk->sk_stamp = ktime_set(-1L, -1L);
1575 1729
1576 atomic_set(&sk->sk_refcnt, 1); 1730 atomic_set(&sk->sk_refcnt, 1);
1731 atomic_set(&sk->sk_drops, 0);
1577} 1732}
1578 1733
1579void fastcall lock_sock_nested(struct sock *sk, int subclass) 1734void fastcall lock_sock_nested(struct sock *sk, int subclass)
@@ -1768,7 +1923,11 @@ int proto_register(struct proto *prot, int alloc_slab)
1768{ 1923{
1769 char *request_sock_slab_name = NULL; 1924 char *request_sock_slab_name = NULL;
1770 char *timewait_sock_slab_name; 1925 char *timewait_sock_slab_name;
1771 int rc = -ENOBUFS; 1926
1927 if (sock_prot_inuse_init(prot) != 0) {
1928 printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
1929 goto out;
1930 }
1772 1931
1773 if (alloc_slab) { 1932 if (alloc_slab) {
1774 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 1933 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
@@ -1777,7 +1936,7 @@ int proto_register(struct proto *prot, int alloc_slab)
1777 if (prot->slab == NULL) { 1936 if (prot->slab == NULL) {
1778 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", 1937 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1779 prot->name); 1938 prot->name);
1780 goto out; 1939 goto out_free_inuse;
1781 } 1940 }
1782 1941
1783 if (prot->rsk_prot != NULL) { 1942 if (prot->rsk_prot != NULL) {
@@ -1821,9 +1980,8 @@ int proto_register(struct proto *prot, int alloc_slab)
1821 write_lock(&proto_list_lock); 1980 write_lock(&proto_list_lock);
1822 list_add(&prot->node, &proto_list); 1981 list_add(&prot->node, &proto_list);
1823 write_unlock(&proto_list_lock); 1982 write_unlock(&proto_list_lock);
1824 rc = 0; 1983 return 0;
1825out: 1984
1826 return rc;
1827out_free_timewait_sock_slab_name: 1985out_free_timewait_sock_slab_name:
1828 kfree(timewait_sock_slab_name); 1986 kfree(timewait_sock_slab_name);
1829out_free_request_sock_slab: 1987out_free_request_sock_slab:
@@ -1836,7 +1994,10 @@ out_free_request_sock_slab_name:
1836out_free_sock_slab: 1994out_free_sock_slab:
1837 kmem_cache_destroy(prot->slab); 1995 kmem_cache_destroy(prot->slab);
1838 prot->slab = NULL; 1996 prot->slab = NULL;
1839 goto out; 1997out_free_inuse:
1998 sock_prot_inuse_free(prot);
1999out:
2000 return -ENOBUFS;
1840} 2001}
1841 2002
1842EXPORT_SYMBOL(proto_register); 2003EXPORT_SYMBOL(proto_register);
@@ -1847,6 +2008,8 @@ void proto_unregister(struct proto *prot)
1847 list_del(&prot->node); 2008 list_del(&prot->node);
1848 write_unlock(&proto_list_lock); 2009 write_unlock(&proto_list_lock);
1849 2010
2011 sock_prot_inuse_free(prot);
2012
1850 if (prot->slab != NULL) { 2013 if (prot->slab != NULL) {
1851 kmem_cache_destroy(prot->slab); 2014 kmem_cache_destroy(prot->slab);
1852 prot->slab = NULL; 2015 prot->slab = NULL;
@@ -1873,6 +2036,7 @@ EXPORT_SYMBOL(proto_unregister);
1873 2036
1874#ifdef CONFIG_PROC_FS 2037#ifdef CONFIG_PROC_FS
1875static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 2038static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2039 __acquires(proto_list_lock)
1876{ 2040{
1877 read_lock(&proto_list_lock); 2041 read_lock(&proto_list_lock);
1878 return seq_list_start_head(&proto_list, *pos); 2042 return seq_list_start_head(&proto_list, *pos);
@@ -1884,6 +2048,7 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1884} 2048}
1885 2049
1886static void proto_seq_stop(struct seq_file *seq, void *v) 2050static void proto_seq_stop(struct seq_file *seq, void *v)
2051 __releases(proto_list_lock)
1887{ 2052{
1888 read_unlock(&proto_list_lock); 2053 read_unlock(&proto_list_lock);
1889} 2054}
@@ -2003,7 +2168,3 @@ EXPORT_SYMBOL(sock_wmalloc);
2003EXPORT_SYMBOL(sock_i_uid); 2168EXPORT_SYMBOL(sock_i_uid);
2004EXPORT_SYMBOL(sock_i_ino); 2169EXPORT_SYMBOL(sock_i_ino);
2005EXPORT_SYMBOL(sysctl_optmem_max); 2170EXPORT_SYMBOL(sysctl_optmem_max);
2006#ifdef CONFIG_SYSCTL
2007EXPORT_SYMBOL(sysctl_rmem_max);
2008EXPORT_SYMBOL(sysctl_wmem_max);
2009#endif
diff --git a/net/core/stream.c b/net/core/stream.c
index 755bacbcb321..4a0ad152c9c4 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk)
35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
36 wake_up_interruptible(sk->sk_sleep); 36 wake_up_interruptible(sk->sk_sleep);
37 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 37 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
38 sock_wake_async(sock, 2, POLL_OUT); 38 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
39 } 39 }
40} 40}
41 41
@@ -172,17 +172,6 @@ do_interrupted:
172 172
173EXPORT_SYMBOL(sk_stream_wait_memory); 173EXPORT_SYMBOL(sk_stream_wait_memory);
174 174
175void sk_stream_rfree(struct sk_buff *skb)
176{
177 struct sock *sk = skb->sk;
178
179 skb_truesize_check(skb);
180 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
181 sk->sk_forward_alloc += skb->truesize;
182}
183
184EXPORT_SYMBOL(sk_stream_rfree);
185
186int sk_stream_error(struct sock *sk, int flags, int err) 175int sk_stream_error(struct sock *sk, int flags, int err)
187{ 176{
188 if (err == -EPIPE) 177 if (err == -EPIPE)
@@ -194,76 +183,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
194 183
195EXPORT_SYMBOL(sk_stream_error); 184EXPORT_SYMBOL(sk_stream_error);
196 185
197void __sk_stream_mem_reclaim(struct sock *sk)
198{
199 atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
200 sk->sk_prot->memory_allocated);
201 sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
202 if (*sk->sk_prot->memory_pressure &&
203 (atomic_read(sk->sk_prot->memory_allocated) <
204 sk->sk_prot->sysctl_mem[0]))
205 *sk->sk_prot->memory_pressure = 0;
206}
207
208EXPORT_SYMBOL(__sk_stream_mem_reclaim);
209
210int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
211{
212 int amt = sk_stream_pages(size);
213
214 sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
215 atomic_add(amt, sk->sk_prot->memory_allocated);
216
217 /* Under limit. */
218 if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
219 if (*sk->sk_prot->memory_pressure)
220 *sk->sk_prot->memory_pressure = 0;
221 return 1;
222 }
223
224 /* Over hard limit. */
225 if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
226 sk->sk_prot->enter_memory_pressure();
227 goto suppress_allocation;
228 }
229
230 /* Under pressure. */
231 if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
232 sk->sk_prot->enter_memory_pressure();
233
234 if (kind) {
235 if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
236 return 1;
237 } else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
238 return 1;
239
240 if (!*sk->sk_prot->memory_pressure ||
241 sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
242 sk_stream_pages(sk->sk_wmem_queued +
243 atomic_read(&sk->sk_rmem_alloc) +
244 sk->sk_forward_alloc))
245 return 1;
246
247suppress_allocation:
248
249 if (!kind) {
250 sk_stream_moderate_sndbuf(sk);
251
252 /* Fail only if socket is _under_ its sndbuf.
253 * In this case we cannot block, so that we have to fail.
254 */
255 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
256 return 1;
257 }
258
259 /* Alas. Undo changes. */
260 sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
261 atomic_sub(amt, sk->sk_prot->memory_allocated);
262 return 0;
263}
264
265EXPORT_SYMBOL(sk_stream_mem_schedule);
266
267void sk_stream_kill_queues(struct sock *sk) 186void sk_stream_kill_queues(struct sock *sk)
268{ 187{
269 /* First the read buffer. */ 188 /* First the read buffer. */
@@ -276,7 +195,7 @@ void sk_stream_kill_queues(struct sock *sk)
276 BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); 195 BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
277 196
278 /* Account for returned memory. */ 197 /* Account for returned memory. */
279 sk_stream_mem_reclaim(sk); 198 sk_mem_reclaim(sk);
280 199
281 BUG_TRAP(!sk->sk_wmem_queued); 200 BUG_TRAP(!sk->sk_wmem_queued);
282 BUG_TRAP(!sk->sk_forward_alloc); 201 BUG_TRAP(!sk->sk_forward_alloc);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 113cc728dc31..130338f83ae5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,12 +10,11 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/socket.h> 11#include <linux/socket.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/init.h>
13#include <net/sock.h> 14#include <net/sock.h>
14#include <net/xfrm.h> 15#include <net/xfrm.h>
15 16
16#ifdef CONFIG_SYSCTL 17static struct ctl_table net_core_table[] = {
17
18ctl_table core_table[] = {
19#ifdef CONFIG_NET 18#ifdef CONFIG_NET
20 { 19 {
21 .ctl_name = NET_CORE_WMEM_MAX, 20 .ctl_name = NET_CORE_WMEM_MAX,
@@ -128,7 +127,7 @@ ctl_table core_table[] = {
128 { 127 {
129 .ctl_name = NET_CORE_SOMAXCONN, 128 .ctl_name = NET_CORE_SOMAXCONN,
130 .procname = "somaxconn", 129 .procname = "somaxconn",
131 .data = &sysctl_somaxconn, 130 .data = &init_net.sysctl_somaxconn,
132 .maxlen = sizeof(int), 131 .maxlen = sizeof(int),
133 .mode = 0644, 132 .mode = 0644,
134 .proc_handler = &proc_dointvec 133 .proc_handler = &proc_dointvec
@@ -152,4 +151,65 @@ ctl_table core_table[] = {
152 { .ctl_name = 0 } 151 { .ctl_name = 0 }
153}; 152};
154 153
155#endif 154static __net_initdata struct ctl_path net_core_path[] = {
155 { .procname = "net", .ctl_name = CTL_NET, },
156 { .procname = "core", .ctl_name = NET_CORE, },
157 { },
158};
159
160static __net_init int sysctl_core_net_init(struct net *net)
161{
162 struct ctl_table *tbl, *tmp;
163
164 net->sysctl_somaxconn = SOMAXCONN;
165
166 tbl = net_core_table;
167 if (net != &init_net) {
168 tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
169 if (tbl == NULL)
170 goto err_dup;
171
172 for (tmp = tbl; tmp->procname; tmp++) {
173 if (tmp->data >= (void *)&init_net &&
174 tmp->data < (void *)(&init_net + 1))
175 tmp->data += (char *)net - (char *)&init_net;
176 else
177 tmp->mode &= ~0222;
178 }
179 }
180
181 net->sysctl_core_hdr = register_net_sysctl_table(net,
182 net_core_path, tbl);
183 if (net->sysctl_core_hdr == NULL)
184 goto err_reg;
185
186 return 0;
187
188err_reg:
189 if (tbl != net_core_table)
190 kfree(tbl);
191err_dup:
192 return -ENOMEM;
193}
194
195static __net_exit void sysctl_core_net_exit(struct net *net)
196{
197 struct ctl_table *tbl;
198
199 tbl = net->sysctl_core_hdr->ctl_table_arg;
200 unregister_net_sysctl_table(net->sysctl_core_hdr);
201 BUG_ON(tbl == net_core_table);
202 kfree(tbl);
203}
204
205static __net_initdata struct pernet_operations sysctl_core_ops = {
206 .init = sysctl_core_net_init,
207 .exit = sysctl_core_net_exit,
208};
209
210static __init int sysctl_core_init(void)
211{
212 return register_pernet_subsys(&sysctl_core_ops);
213}
214
215__initcall(sysctl_core_init);
diff --git a/net/core/utils.c b/net/core/utils.c
index 0bf17da40d52..8031eb59054e 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -91,17 +91,6 @@ EXPORT_SYMBOL(in_aton);
91#define IN6PTON_NULL 0x20000000 /* first/tail */ 91#define IN6PTON_NULL 0x20000000 /* first/tail */
92#define IN6PTON_UNKNOWN 0x40000000 92#define IN6PTON_UNKNOWN 0x40000000
93 93
94static inline int digit2bin(char c, int delim)
95{
96 if (c == delim || c == '\0')
97 return IN6PTON_DELIM;
98 if (c == '.')
99 return IN6PTON_DOT;
100 if (c >= '0' && c <= '9')
101 return (IN6PTON_DIGIT | (c - '0'));
102 return IN6PTON_UNKNOWN;
103}
104
105static inline int xdigit2bin(char c, int delim) 94static inline int xdigit2bin(char c, int delim)
106{ 95{
107 if (c == delim || c == '\0') 96 if (c == delim || c == '\0')
@@ -293,3 +282,19 @@ out:
293} 282}
294 283
295EXPORT_SYMBOL(in6_pton); 284EXPORT_SYMBOL(in6_pton);
285
286void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
287 __be32 from, __be32 to, int pseudohdr)
288{
289 __be32 diff[] = { ~from, to };
290 if (skb->ip_summed != CHECKSUM_PARTIAL) {
291 *sum = csum_fold(csum_partial(diff, sizeof(diff),
292 ~csum_unfold(*sum)));
293 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
294 skb->csum = ~csum_partial(diff, sizeof(diff),
295 ~skb->csum);
296 } else if (pseudohdr)
297 *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
298 csum_unfold(*sum)));
299}
300EXPORT_SYMBOL(inet_proto_csum_replace4);
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 0549e4719b13..7aa2a7acc7ec 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,6 +1,7 @@
1menuconfig IP_DCCP 1menuconfig IP_DCCP
2 tristate "The DCCP Protocol (EXPERIMENTAL)" 2 tristate "The DCCP Protocol (EXPERIMENTAL)"
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select IP_DCCP_CCID2
4 ---help--- 5 ---help---
5 Datagram Congestion Control Protocol (RFC 4340) 6 Datagram Congestion Control Protocol (RFC 4340)
6 7
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 83378f379f72..6de4bd195d28 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -30,7 +30,7 @@ static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
30 kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); 30 kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
31 31
32 if (avr != NULL) 32 if (avr != NULL)
33 INIT_LIST_HEAD(&avr->dccpavr_node); 33 INIT_LIST_HEAD(&avr->avr_node);
34 34
35 return avr; 35 return avr;
36} 36}
@@ -40,7 +40,7 @@ static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
40 if (unlikely(avr == NULL)) 40 if (unlikely(avr == NULL))
41 return; 41 return;
42 /* Check if deleting a linked record */ 42 /* Check if deleting a linked record */
43 WARN_ON(!list_empty(&avr->dccpavr_node)); 43 WARN_ON(!list_empty(&avr->avr_node));
44 kmem_cache_free(dccp_ackvec_record_slab, avr); 44 kmem_cache_free(dccp_ackvec_record_slab, avr);
45} 45}
46 46
@@ -52,16 +52,15 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
52 * just add the AVR at the head of the list. 52 * just add the AVR at the head of the list.
53 * -sorbo. 53 * -sorbo.
54 */ 54 */
55 if (!list_empty(&av->dccpav_records)) { 55 if (!list_empty(&av->av_records)) {
56 const struct dccp_ackvec_record *head = 56 const struct dccp_ackvec_record *head =
57 list_entry(av->dccpav_records.next, 57 list_entry(av->av_records.next,
58 struct dccp_ackvec_record, 58 struct dccp_ackvec_record,
59 dccpavr_node); 59 avr_node);
60 BUG_ON(before48(avr->dccpavr_ack_seqno, 60 BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
61 head->dccpavr_ack_seqno));
62 } 61 }
63 62
64 list_add(&avr->dccpavr_node, &av->dccpav_records); 63 list_add(&avr->avr_node, &av->av_records);
65} 64}
66 65
67int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) 66int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
@@ -69,9 +68,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
69 struct dccp_sock *dp = dccp_sk(sk); 68 struct dccp_sock *dp = dccp_sk(sk);
70 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; 69 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
71 /* Figure out how many options do we need to represent the ackvec */ 70 /* Figure out how many options do we need to represent the ackvec */
72 const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len, 71 const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
73 DCCP_MAX_ACKVEC_OPT_LEN); 72 u16 len = av->av_vec_len + 2 * nr_opts, i;
74 u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
75 u32 elapsed_time; 73 u32 elapsed_time;
76 const unsigned char *tail, *from; 74 const unsigned char *tail, *from;
77 unsigned char *to; 75 unsigned char *to;
@@ -81,7 +79,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
81 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) 79 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
82 return -1; 80 return -1;
83 81
84 delta = ktime_us_delta(ktime_get_real(), av->dccpav_time); 82 delta = ktime_us_delta(ktime_get_real(), av->av_time);
85 elapsed_time = delta / 10; 83 elapsed_time = delta / 10;
86 84
87 if (elapsed_time != 0 && 85 if (elapsed_time != 0 &&
@@ -95,9 +93,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
95 DCCP_SKB_CB(skb)->dccpd_opt_len += len; 93 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
96 94
97 to = skb_push(skb, len); 95 to = skb_push(skb, len);
98 len = av->dccpav_vec_len; 96 len = av->av_vec_len;
99 from = av->dccpav_buf + av->dccpav_buf_head; 97 from = av->av_buf + av->av_buf_head;
100 tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN; 98 tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
101 99
102 for (i = 0; i < nr_opts; ++i) { 100 for (i = 0; i < nr_opts; ++i) {
103 int copylen = len; 101 int copylen = len;
@@ -116,7 +114,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
116 to += tailsize; 114 to += tailsize;
117 len -= tailsize; 115 len -= tailsize;
118 copylen -= tailsize; 116 copylen -= tailsize;
119 from = av->dccpav_buf; 117 from = av->av_buf;
120 } 118 }
121 119
122 memcpy(to, from, copylen); 120 memcpy(to, from, copylen);
@@ -134,19 +132,19 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
134 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will 132 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
135 * equal buf_nonce. 133 * equal buf_nonce.
136 */ 134 */
137 avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 135 avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
138 avr->dccpavr_ack_ptr = av->dccpav_buf_head; 136 avr->avr_ack_ptr = av->av_buf_head;
139 avr->dccpavr_ack_ackno = av->dccpav_buf_ackno; 137 avr->avr_ack_ackno = av->av_buf_ackno;
140 avr->dccpavr_ack_nonce = av->dccpav_buf_nonce; 138 avr->avr_ack_nonce = av->av_buf_nonce;
141 avr->dccpavr_sent_len = av->dccpav_vec_len; 139 avr->avr_sent_len = av->av_vec_len;
142 140
143 dccp_ackvec_insert_avr(av, avr); 141 dccp_ackvec_insert_avr(av, avr);
144 142
145 dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " 143 dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
146 "ack_ackno=%llu\n", 144 "ack_ackno=%llu\n",
147 dccp_role(sk), avr->dccpavr_sent_len, 145 dccp_role(sk), avr->avr_sent_len,
148 (unsigned long long)avr->dccpavr_ack_seqno, 146 (unsigned long long)avr->avr_ack_seqno,
149 (unsigned long long)avr->dccpavr_ack_ackno); 147 (unsigned long long)avr->avr_ack_ackno);
150 return 0; 148 return 0;
151} 149}
152 150
@@ -155,12 +153,12 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
155 struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); 153 struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
156 154
157 if (av != NULL) { 155 if (av != NULL) {
158 av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; 156 av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
159 av->dccpav_buf_ackno = UINT48_MAX + 1; 157 av->av_buf_ackno = UINT48_MAX + 1;
160 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; 158 av->av_buf_nonce = 0;
161 av->dccpav_time = ktime_set(0, 0); 159 av->av_time = ktime_set(0, 0);
162 av->dccpav_vec_len = 0; 160 av->av_vec_len = 0;
163 INIT_LIST_HEAD(&av->dccpav_records); 161 INIT_LIST_HEAD(&av->av_records);
164 } 162 }
165 163
166 return av; 164 return av;
@@ -171,12 +169,11 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
171 if (unlikely(av == NULL)) 169 if (unlikely(av == NULL))
172 return; 170 return;
173 171
174 if (!list_empty(&av->dccpav_records)) { 172 if (!list_empty(&av->av_records)) {
175 struct dccp_ackvec_record *avr, *next; 173 struct dccp_ackvec_record *avr, *next;
176 174
177 list_for_each_entry_safe(avr, next, &av->dccpav_records, 175 list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
178 dccpavr_node) { 176 list_del_init(&avr->avr_node);
179 list_del_init(&avr->dccpavr_node);
180 dccp_ackvec_record_delete(avr); 177 dccp_ackvec_record_delete(avr);
181 } 178 }
182 } 179 }
@@ -187,13 +184,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
187static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, 184static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
188 const u32 index) 185 const u32 index)
189{ 186{
190 return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; 187 return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
191} 188}
192 189
193static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, 190static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
194 const u32 index) 191 const u32 index)
195{ 192{
196 return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; 193 return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
197} 194}
198 195
199/* 196/*
@@ -208,29 +205,29 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
208 unsigned int gap; 205 unsigned int gap;
209 long new_head; 206 long new_head;
210 207
211 if (av->dccpav_vec_len + packets > DCCP_MAX_ACKVEC_LEN) 208 if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
212 return -ENOBUFS; 209 return -ENOBUFS;
213 210
214 gap = packets - 1; 211 gap = packets - 1;
215 new_head = av->dccpav_buf_head - packets; 212 new_head = av->av_buf_head - packets;
216 213
217 if (new_head < 0) { 214 if (new_head < 0) {
218 if (gap > 0) { 215 if (gap > 0) {
219 memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, 216 memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
220 gap + new_head + 1); 217 gap + new_head + 1);
221 gap = -new_head; 218 gap = -new_head;
222 } 219 }
223 new_head += DCCP_MAX_ACKVEC_LEN; 220 new_head += DCCP_MAX_ACKVEC_LEN;
224 } 221 }
225 222
226 av->dccpav_buf_head = new_head; 223 av->av_buf_head = new_head;
227 224
228 if (gap > 0) 225 if (gap > 0)
229 memset(av->dccpav_buf + av->dccpav_buf_head + 1, 226 memset(av->av_buf + av->av_buf_head + 1,
230 DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); 227 DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
231 228
232 av->dccpav_buf[av->dccpav_buf_head] = state; 229 av->av_buf[av->av_buf_head] = state;
233 av->dccpav_vec_len += packets; 230 av->av_vec_len += packets;
234 return 0; 231 return 0;
235} 232}
236 233
@@ -243,7 +240,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
243 /* 240 /*
244 * Check at the right places if the buffer is full, if it is, tell the 241 * Check at the right places if the buffer is full, if it is, tell the
245 * caller to start dropping packets till the HC-Sender acks our ACK 242 * caller to start dropping packets till the HC-Sender acks our ACK
246 * vectors, when we will free up space in dccpav_buf. 243 * vectors, when we will free up space in av_buf.
247 * 244 *
248 * We may well decide to do buffer compression, etc, but for now lets 245 * We may well decide to do buffer compression, etc, but for now lets
249 * just drop. 246 * just drop.
@@ -263,22 +260,20 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
263 */ 260 */
264 261
265 /* See if this is the first ackno being inserted */ 262 /* See if this is the first ackno being inserted */
266 if (av->dccpav_vec_len == 0) { 263 if (av->av_vec_len == 0) {
267 av->dccpav_buf[av->dccpav_buf_head] = state; 264 av->av_buf[av->av_buf_head] = state;
268 av->dccpav_vec_len = 1; 265 av->av_vec_len = 1;
269 } else if (after48(ackno, av->dccpav_buf_ackno)) { 266 } else if (after48(ackno, av->av_buf_ackno)) {
270 const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, 267 const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
271 ackno);
272 268
273 /* 269 /*
274 * Look if the state of this packet is the same as the 270 * Look if the state of this packet is the same as the
275 * previous ackno and if so if we can bump the head len. 271 * previous ackno and if so if we can bump the head len.
276 */ 272 */
277 if (delta == 1 && 273 if (delta == 1 &&
278 dccp_ackvec_state(av, av->dccpav_buf_head) == state && 274 dccp_ackvec_state(av, av->av_buf_head) == state &&
279 (dccp_ackvec_len(av, av->dccpav_buf_head) < 275 dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
280 DCCP_ACKVEC_LEN_MASK)) 276 av->av_buf[av->av_buf_head]++;
281 av->dccpav_buf[av->dccpav_buf_head]++;
282 else if (dccp_ackvec_set_buf_head_state(av, delta, state)) 277 else if (dccp_ackvec_set_buf_head_state(av, delta, state))
283 return -ENOBUFS; 278 return -ENOBUFS;
284 } else { 279 } else {
@@ -290,14 +285,14 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
290 * the byte corresponding to S. (Indexing structures 285 * the byte corresponding to S. (Indexing structures
291 * could reduce the complexity of this scan.) 286 * could reduce the complexity of this scan.)
292 */ 287 */
293 u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); 288 u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
294 u32 index = av->dccpav_buf_head; 289 u32 index = av->av_buf_head;
295 290
296 while (1) { 291 while (1) {
297 const u8 len = dccp_ackvec_len(av, index); 292 const u8 len = dccp_ackvec_len(av, index);
298 const u8 state = dccp_ackvec_state(av, index); 293 const u8 state = dccp_ackvec_state(av, index);
299 /* 294 /*
300 * valid packets not yet in dccpav_buf have a reserved 295 * valid packets not yet in av_buf have a reserved
301 * entry, with a len equal to 0. 296 * entry, with a len equal to 0.
302 */ 297 */
303 if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && 298 if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
@@ -305,7 +300,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
305 reserved seat! */ 300 reserved seat! */
306 dccp_pr_debug("Found %llu reserved seat!\n", 301 dccp_pr_debug("Found %llu reserved seat!\n",
307 (unsigned long long)ackno); 302 (unsigned long long)ackno);
308 av->dccpav_buf[index] = state; 303 av->av_buf[index] = state;
309 goto out; 304 goto out;
310 } 305 }
311 /* len == 0 means one packet */ 306 /* len == 0 means one packet */
@@ -318,8 +313,8 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
318 } 313 }
319 } 314 }
320 315
321 av->dccpav_buf_ackno = ackno; 316 av->av_buf_ackno = ackno;
322 av->dccpav_time = ktime_get_real(); 317 av->av_time = ktime_get_real();
323out: 318out:
324 return 0; 319 return 0;
325 320
@@ -349,9 +344,9 @@ void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
349 344
350void dccp_ackvec_print(const struct dccp_ackvec *av) 345void dccp_ackvec_print(const struct dccp_ackvec *av)
351{ 346{
352 dccp_ackvector_print(av->dccpav_buf_ackno, 347 dccp_ackvector_print(av->av_buf_ackno,
353 av->dccpav_buf + av->dccpav_buf_head, 348 av->av_buf + av->av_buf_head,
354 av->dccpav_vec_len); 349 av->av_vec_len);
355} 350}
356#endif 351#endif
357 352
@@ -361,17 +356,15 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
361 struct dccp_ackvec_record *next; 356 struct dccp_ackvec_record *next;
362 357
363 /* sort out vector length */ 358 /* sort out vector length */
364 if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr) 359 if (av->av_buf_head <= avr->avr_ack_ptr)
365 av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head; 360 av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
366 else 361 else
367 av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1 362 av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
368 - av->dccpav_buf_head 363 av->av_buf_head + avr->avr_ack_ptr;
369 + avr->dccpavr_ack_ptr;
370 364
371 /* free records */ 365 /* free records */
372 list_for_each_entry_safe_from(avr, next, &av->dccpav_records, 366 list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
373 dccpavr_node) { 367 list_del_init(&avr->avr_node);
374 list_del_init(&avr->dccpavr_node);
375 dccp_ackvec_record_delete(avr); 368 dccp_ackvec_record_delete(avr);
376 } 369 }
377} 370}
@@ -386,16 +379,16 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
386 * windows. We will be receiving ACKs for stuff we sent a while back 379 * windows. We will be receiving ACKs for stuff we sent a while back
387 * -sorbo. 380 * -sorbo.
388 */ 381 */
389 list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) { 382 list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
390 if (ackno == avr->dccpavr_ack_seqno) { 383 if (ackno == avr->avr_ack_seqno) {
391 dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " 384 dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
392 "ack_ackno=%llu, ACKED!\n", 385 "ack_ackno=%llu, ACKED!\n",
393 dccp_role(sk), 1, 386 dccp_role(sk), 1,
394 (unsigned long long)avr->dccpavr_ack_seqno, 387 (unsigned long long)avr->avr_ack_seqno,
395 (unsigned long long)avr->dccpavr_ack_ackno); 388 (unsigned long long)avr->avr_ack_ackno);
396 dccp_ackvec_throw_record(av, avr); 389 dccp_ackvec_throw_record(av, avr);
397 break; 390 break;
398 } else if (avr->dccpavr_ack_seqno > ackno) 391 } else if (avr->avr_ack_seqno > ackno)
399 break; /* old news */ 392 break; /* old news */
400 } 393 }
401} 394}
@@ -409,7 +402,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
409 struct dccp_ackvec_record *avr; 402 struct dccp_ackvec_record *avr;
410 403
411 /* Check if we actually sent an ACK vector */ 404 /* Check if we actually sent an ACK vector */
412 if (list_empty(&av->dccpav_records)) 405 if (list_empty(&av->av_records))
413 return; 406 return;
414 407
415 i = len; 408 i = len;
@@ -418,8 +411,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
418 * I think it might be more efficient to work backwards. See comment on 411 * I think it might be more efficient to work backwards. See comment on
419 * rcv_ackno. -sorbo. 412 * rcv_ackno. -sorbo.
420 */ 413 */
421 avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record, 414 avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
422 dccpavr_node);
423 while (i--) { 415 while (i--) {
424 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 416 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
425 u64 ackno_end_rl; 417 u64 ackno_end_rl;
@@ -430,15 +422,14 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
430 * If our AVR sequence number is greater than the ack, go 422 * If our AVR sequence number is greater than the ack, go
431 * forward in the AVR list until it is not so. 423 * forward in the AVR list until it is not so.
432 */ 424 */
433 list_for_each_entry_from(avr, &av->dccpav_records, 425 list_for_each_entry_from(avr, &av->av_records, avr_node) {
434 dccpavr_node) { 426 if (!after48(avr->avr_ack_seqno, *ackno))
435 if (!after48(avr->dccpavr_ack_seqno, *ackno))
436 goto found; 427 goto found;
437 } 428 }
438 /* End of the dccpav_records list, not found, exit */ 429 /* End of the av_records list, not found, exit */
439 break; 430 break;
440found: 431found:
441 if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) { 432 if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
442 const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; 433 const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
443 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { 434 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
444 dccp_pr_debug("%s ACK vector 0, len=%d, " 435 dccp_pr_debug("%s ACK vector 0, len=%d, "
@@ -446,9 +437,9 @@ found:
446 "ACKED!\n", 437 "ACKED!\n",
447 dccp_role(sk), len, 438 dccp_role(sk), len,
448 (unsigned long long) 439 (unsigned long long)
449 avr->dccpavr_ack_seqno, 440 avr->avr_ack_seqno,
450 (unsigned long long) 441 (unsigned long long)
451 avr->dccpavr_ack_ackno); 442 avr->avr_ack_ackno);
452 dccp_ackvec_throw_record(av, avr); 443 dccp_ackvec_throw_record(av, avr);
453 break; 444 break;
454 } 445 }
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 9ef0737043ee..bcb64fb4acef 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -32,54 +32,54 @@
32 * 32 *
33 * This data structure is the one defined in RFC 4340, Appendix A. 33 * This data structure is the one defined in RFC 4340, Appendix A.
34 * 34 *
35 * @dccpav_buf_head - circular buffer head 35 * @av_buf_head - circular buffer head
36 * @dccpav_buf_tail - circular buffer tail 36 * @av_buf_tail - circular buffer tail
37 * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the 37 * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
38 * buffer (i.e. %dccpav_buf_head) 38 * buffer (i.e. %av_buf_head)
39 * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked 39 * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
40 * by the buffer with State 0 40 * by the buffer with State 0
41 * 41 *
42 * Additionally, the HC-Receiver must keep some information about the 42 * Additionally, the HC-Receiver must keep some information about the
43 * Ack Vectors it has recently sent. For each packet sent carrying an 43 * Ack Vectors it has recently sent. For each packet sent carrying an
44 * Ack Vector, it remembers four variables: 44 * Ack Vector, it remembers four variables:
45 * 45 *
46 * @dccpav_records - list of dccp_ackvec_record 46 * @av_records - list of dccp_ackvec_record
47 * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. 47 * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
48 * 48 *
49 * @dccpav_time - the time in usecs 49 * @av_time - the time in usecs
50 * @dccpav_buf - circular buffer of acknowledgeable packets 50 * @av_buf - circular buffer of acknowledgeable packets
51 */ 51 */
52struct dccp_ackvec { 52struct dccp_ackvec {
53 u64 dccpav_buf_ackno; 53 u64 av_buf_ackno;
54 struct list_head dccpav_records; 54 struct list_head av_records;
55 ktime_t dccpav_time; 55 ktime_t av_time;
56 u16 dccpav_buf_head; 56 u16 av_buf_head;
57 u16 dccpav_vec_len; 57 u16 av_vec_len;
58 u8 dccpav_buf_nonce; 58 u8 av_buf_nonce;
59 u8 dccpav_ack_nonce; 59 u8 av_ack_nonce;
60 u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN]; 60 u8 av_buf[DCCP_MAX_ACKVEC_LEN];
61}; 61};
62 62
63/** struct dccp_ackvec_record - ack vector record 63/** struct dccp_ackvec_record - ack vector record
64 * 64 *
65 * ACK vector record as defined in Appendix A of spec. 65 * ACK vector record as defined in Appendix A of spec.
66 * 66 *
67 * The list is sorted by dccpavr_ack_seqno 67 * The list is sorted by avr_ack_seqno
68 * 68 *
69 * @dccpavr_node - node in dccpav_records 69 * @avr_node - node in av_records
70 * @dccpavr_ack_seqno - sequence number of the packet this record was sent on 70 * @avr_ack_seqno - sequence number of the packet this record was sent on
71 * @dccpavr_ack_ackno - sequence number being acknowledged 71 * @avr_ack_ackno - sequence number being acknowledged
72 * @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts 72 * @avr_ack_ptr - pointer into av_buf where this record starts
73 * @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent 73 * @avr_ack_nonce - av_ack_nonce at the time this record was sent
74 * @dccpavr_sent_len - lenght of the record in dccpav_buf 74 * @avr_sent_len - lenght of the record in av_buf
75 */ 75 */
76struct dccp_ackvec_record { 76struct dccp_ackvec_record {
77 struct list_head dccpavr_node; 77 struct list_head avr_node;
78 u64 dccpavr_ack_seqno; 78 u64 avr_ack_seqno;
79 u64 dccpavr_ack_ackno; 79 u64 avr_ack_ackno;
80 u16 dccpavr_ack_ptr; 80 u16 avr_ack_ptr;
81 u16 dccpavr_sent_len; 81 u16 avr_sent_len;
82 u8 dccpavr_ack_nonce; 82 u8 avr_ack_nonce;
83}; 83};
84 84
85struct sock; 85struct sock;
@@ -105,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
105 105
106static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) 106static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
107{ 107{
108 return av->dccpav_vec_len; 108 return av->av_vec_len;
109} 109}
110#else /* CONFIG_IP_DCCP_ACKVEC */ 110#else /* CONFIG_IP_DCCP_ACKVEC */
111static inline int dccp_ackvec_init(void) 111static inline int dccp_ackvec_init(void)
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index c45088b5e6fb..4809753d12ae 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -92,15 +92,15 @@ int ccid_register(struct ccid_operations *ccid_ops)
92 92
93 ccid_ops->ccid_hc_rx_slab = 93 ccid_ops->ccid_hc_rx_slab =
94 ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size, 94 ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size,
95 "%s_hc_rx_sock", 95 "ccid%u_hc_rx_sock",
96 ccid_ops->ccid_name); 96 ccid_ops->ccid_id);
97 if (ccid_ops->ccid_hc_rx_slab == NULL) 97 if (ccid_ops->ccid_hc_rx_slab == NULL)
98 goto out; 98 goto out;
99 99
100 ccid_ops->ccid_hc_tx_slab = 100 ccid_ops->ccid_hc_tx_slab =
101 ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size, 101 ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size,
102 "%s_hc_tx_sock", 102 "ccid%u_hc_tx_sock",
103 ccid_ops->ccid_name); 103 ccid_ops->ccid_id);
104 if (ccid_ops->ccid_hc_tx_slab == NULL) 104 if (ccid_ops->ccid_hc_tx_slab == NULL)
105 goto out_free_rx_slab; 105 goto out_free_rx_slab;
106 106
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c65cb2453e43..fdeae7b57319 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -23,14 +23,37 @@
23 23
24struct tcp_info; 24struct tcp_info;
25 25
26/**
27 * struct ccid_operations - Interface to Congestion-Control Infrastructure
28 *
29 * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
30 * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled)
31 * @ccid_name: alphabetical identifier string for @ccid_id
32 * @ccid_owner: module which implements/owns this CCID
33 * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
34 * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket
35 *
36 * @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup)
37 * @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction)
38 * @ccid_hc_rx_packet_recv: implements the HC-receiver side
39 * @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options
40 * @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options
41 * @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender
42 * @ccid_hc_tx_send_packet: implements the sending part of the HC-sender
43 * @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender
44 * @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender
45 * @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender
46 */
26struct ccid_operations { 47struct ccid_operations {
27 unsigned char ccid_id; 48 unsigned char ccid_id;
28 const char *ccid_name; 49 __u32 ccid_ccmps;
29 struct module *ccid_owner; 50 const char *ccid_name;
30 struct kmem_cache *ccid_hc_rx_slab; 51 struct module *ccid_owner;
31 __u32 ccid_hc_rx_obj_size; 52 struct kmem_cache *ccid_hc_rx_slab,
32 struct kmem_cache *ccid_hc_tx_slab; 53 *ccid_hc_tx_slab;
33 __u32 ccid_hc_tx_obj_size; 54 __u32 ccid_hc_rx_obj_size,
55 ccid_hc_tx_obj_size;
56 /* Interface Routines */
34 int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); 57 int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
35 int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); 58 int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
36 void (*ccid_hc_rx_exit)(struct sock *sk); 59 void (*ccid_hc_rx_exit)(struct sock *sk);
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 80f469887691..12275943eab8 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,9 +1,8 @@
1menu "DCCP CCIDs Configuration (EXPERIMENTAL)" 1menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
2 depends on IP_DCCP && EXPERIMENTAL 2 depends on EXPERIMENTAL
3 3
4config IP_DCCP_CCID2 4config IP_DCCP_CCID2
5 tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" 5 tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
6 depends on IP_DCCP
7 def_tristate IP_DCCP 6 def_tristate IP_DCCP
8 select IP_DCCP_ACKVEC 7 select IP_DCCP_ACKVEC
9 ---help--- 8 ---help---
@@ -20,18 +19,9 @@ config IP_DCCP_CCID2
20 to the user. For example, a hypothetical application that 19 to the user. For example, a hypothetical application that
21 transferred files over DCCP, using application-level retransmissions 20 transferred files over DCCP, using application-level retransmissions
22 for lost packets, would prefer CCID 2 to CCID 3. On-line games may 21 for lost packets, would prefer CCID 2 to CCID 3. On-line games may
23 also prefer CCID 2. 22 also prefer CCID 2. See RFC 4341 for further details.
24 23
25 CCID 2 is further described in RFC 4341, 24 CCID2 is the default CCID used by DCCP.
26 http://www.ietf.org/rfc/rfc4341.txt
27
28 This text was extracted from RFC 4340 (sec. 10.1),
29 http://www.ietf.org/rfc/rfc4340.txt
30
31 To compile this CCID as a module, choose M here: the module will be
32 called dccp_ccid2.
33
34 If in doubt, say M.
35 25
36config IP_DCCP_CCID2_DEBUG 26config IP_DCCP_CCID2_DEBUG
37 bool "CCID2 debugging messages" 27 bool "CCID2 debugging messages"
@@ -47,8 +37,8 @@ config IP_DCCP_CCID2_DEBUG
47 37
48config IP_DCCP_CCID3 38config IP_DCCP_CCID3
49 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" 39 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
50 depends on IP_DCCP
51 def_tristate IP_DCCP 40 def_tristate IP_DCCP
41 select IP_DCCP_TFRC_LIB
52 ---help--- 42 ---help---
53 CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based 43 CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
54 rate-controlled congestion control mechanism. TFRC is designed to 44 rate-controlled congestion control mechanism. TFRC is designed to
@@ -74,10 +64,6 @@ config IP_DCCP_CCID3
74 64
75 If in doubt, say M. 65 If in doubt, say M.
76 66
77config IP_DCCP_TFRC_LIB
78 depends on IP_DCCP_CCID3
79 def_tristate IP_DCCP_CCID3
80
81config IP_DCCP_CCID3_DEBUG 67config IP_DCCP_CCID3_DEBUG
82 bool "CCID3 debugging messages" 68 bool "CCID3 debugging messages"
83 depends on IP_DCCP_CCID3 69 depends on IP_DCCP_CCID3
@@ -121,5 +107,13 @@ config IP_DCCP_CCID3_RTO
121 is serious network congestion: experimenting with larger values should 107 is serious network congestion: experimenting with larger values should
122 therefore not be performed on WANs. 108 therefore not be performed on WANs.
123 109
110config IP_DCCP_TFRC_LIB
111 tristate
112 default n
113
114config IP_DCCP_TFRC_DEBUG
115 bool
116 depends on IP_DCCP_TFRC_LIB
117 default y if IP_DCCP_CCID3_DEBUG
124 118
125endmenu 119endmenu
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d694656b8800..b5b52ebb2693 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -24,9 +24,6 @@
24 24
25/* 25/*
26 * This implementation should follow RFC 4341 26 * This implementation should follow RFC 4341
27 *
28 * BUGS:
29 * - sequence number wrapping
30 */ 27 */
31 28
32#include "../ccid.h" 29#include "../ccid.h"
@@ -129,50 +126,35 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
129{ 126{
130 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 127 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
131 128
132 ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe, 129 if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
133 hctx->ccid2hctx_cwnd); 130 return 0;
134
135 if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
136 /* OK we can send... make sure previous packet was sent off */
137 if (!hctx->ccid2hctx_sendwait) {
138 hctx->ccid2hctx_sendwait = 1;
139 return 0;
140 }
141 }
142 131
143 return 1; /* XXX CCID should dequeue when ready instead of polling */ 132 return 1; /* XXX CCID should dequeue when ready instead of polling */
144} 133}
145 134
146static void ccid2_change_l_ack_ratio(struct sock *sk, int val) 135static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
147{ 136{
148 struct dccp_sock *dp = dccp_sk(sk); 137 struct dccp_sock *dp = dccp_sk(sk);
138 u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
139
149 /* 140 /*
150 * XXX I don't really agree with val != 2. If cwnd is 1, ack ratio 141 * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
151 * should be 1... it shouldn't be allowed to become 2. 142 * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
152 * -sorbo. 143 * acceptable since this causes starvation/deadlock whenever cwnd < 2.
144 * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
153 */ 145 */
154 if (val != 2) { 146 if (val == 0 || val > max_ratio) {
155 const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 147 DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
156 int max = hctx->ccid2hctx_cwnd / 2; 148 val = max_ratio;
157
158 /* round up */
159 if (hctx->ccid2hctx_cwnd & 1)
160 max++;
161
162 if (val > max)
163 val = max;
164 } 149 }
150 if (val > 0xFFFF) /* RFC 4340, 11.3 */
151 val = 0xFFFF;
165 152
166 ccid2_pr_debug("changing local ack ratio to %d\n", val); 153 if (val == dp->dccps_l_ack_ratio)
167 WARN_ON(val <= 0); 154 return;
168 dp->dccps_l_ack_ratio = val;
169}
170 155
171static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val) 156 ccid2_pr_debug("changing local ack ratio to %u\n", val);
172{ 157 dp->dccps_l_ack_ratio = val;
173 /* XXX do we need to change ack ratio? */
174 hctx->ccid2hctx_cwnd = val? : 1;
175 ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd);
176} 158}
177 159
178static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) 160static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
@@ -181,11 +163,6 @@ static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
181 hctx->ccid2hctx_srtt = val; 163 hctx->ccid2hctx_srtt = val;
182} 164}
183 165
184static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
185{
186 hctx->ccid2hctx_pipe = val;
187}
188
189static void ccid2_start_rto_timer(struct sock *sk); 166static void ccid2_start_rto_timer(struct sock *sk);
190 167
191static void ccid2_hc_tx_rto_expire(unsigned long data) 168static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -215,21 +192,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
215 ccid2_start_rto_timer(sk); 192 ccid2_start_rto_timer(sk);
216 193
217 /* adjust pipe, cwnd etc */ 194 /* adjust pipe, cwnd etc */
218 ccid2_change_pipe(hctx, 0); 195 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
219 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
220 if (hctx->ccid2hctx_ssthresh < 2) 196 if (hctx->ccid2hctx_ssthresh < 2)
221 hctx->ccid2hctx_ssthresh = 2; 197 hctx->ccid2hctx_ssthresh = 2;
222 ccid2_change_cwnd(hctx, 1); 198 hctx->ccid2hctx_cwnd = 1;
199 hctx->ccid2hctx_pipe = 0;
223 200
224 /* clear state about stuff we sent */ 201 /* clear state about stuff we sent */
225 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; 202 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
226 hctx->ccid2hctx_ssacks = 0; 203 hctx->ccid2hctx_packets_acked = 0;
227 hctx->ccid2hctx_acks = 0;
228 hctx->ccid2hctx_sent = 0;
229 204
230 /* clear ack ratio state. */ 205 /* clear ack ratio state. */
231 hctx->ccid2hctx_arsent = 0;
232 hctx->ccid2hctx_ackloss = 0;
233 hctx->ccid2hctx_rpseq = 0; 206 hctx->ccid2hctx_rpseq = 0;
234 hctx->ccid2hctx_rpdupack = -1; 207 hctx->ccid2hctx_rpdupack = -1;
235 ccid2_change_l_ack_ratio(sk, 1); 208 ccid2_change_l_ack_ratio(sk, 1);
@@ -255,23 +228,10 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
255 struct dccp_sock *dp = dccp_sk(sk); 228 struct dccp_sock *dp = dccp_sk(sk);
256 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 229 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
257 struct ccid2_seq *next; 230 struct ccid2_seq *next;
258 u64 seq;
259
260 ccid2_hc_tx_check_sanity(hctx);
261 231
262 BUG_ON(!hctx->ccid2hctx_sendwait); 232 hctx->ccid2hctx_pipe++;
263 hctx->ccid2hctx_sendwait = 0;
264 ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
265 BUG_ON(hctx->ccid2hctx_pipe < 0);
266 233
267 /* There is an issue. What if another packet is sent between 234 hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss;
268 * packet_send() and packet_sent(). Then the sequence number would be
269 * wrong.
270 * -sorbo.
271 */
272 seq = dp->dccps_gss;
273
274 hctx->ccid2hctx_seqh->ccid2s_seq = seq;
275 hctx->ccid2hctx_seqh->ccid2s_acked = 0; 235 hctx->ccid2hctx_seqh->ccid2s_acked = 0;
276 hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; 236 hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
277 237
@@ -291,8 +251,26 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
291 ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, 251 ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
292 hctx->ccid2hctx_pipe); 252 hctx->ccid2hctx_pipe);
293 253
294 hctx->ccid2hctx_sent++; 254 /*
295 255 * FIXME: The code below is broken and the variables have been removed
256 * from the socket struct. The `ackloss' variable was always set to 0,
257 * and with arsent there are several problems:
258 * (i) it doesn't just count the number of Acks, but all sent packets;
259 * (ii) it is expressed in # of packets, not # of windows, so the
260 * comparison below uses the wrong formula: Appendix A of RFC 4341
261 * comes up with the number K = cwnd / (R^2 - R) of consecutive windows
262 * of data with no lost or marked Ack packets. If arsent were the # of
263 * consecutive Acks received without loss, then Ack Ratio needs to be
264 * decreased by 1 when
265 * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2)
266 * where cwnd / R is the number of Acks received per window of data
267 * (cf. RFC 4341, App. A). The problems are that
268 * - arsent counts other packets as well;
269 * - the comparison uses a formula different from RFC 4341;
270 * - computing a cubic/quadratic equation each time is too complicated.
271 * Hence a different algorithm is needed.
272 */
273#if 0
296 /* Ack Ratio. Need to maintain a concept of how many windows we sent */ 274 /* Ack Ratio. Need to maintain a concept of how many windows we sent */
297 hctx->ccid2hctx_arsent++; 275 hctx->ccid2hctx_arsent++;
298 /* We had an ack loss in this window... */ 276 /* We had an ack loss in this window... */
@@ -320,14 +298,13 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
320 hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ 298 hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
321 } 299 }
322 } 300 }
301#endif
323 302
324 /* setup RTO timer */ 303 /* setup RTO timer */
325 if (!timer_pending(&hctx->ccid2hctx_rtotimer)) 304 if (!timer_pending(&hctx->ccid2hctx_rtotimer))
326 ccid2_start_rto_timer(sk); 305 ccid2_start_rto_timer(sk);
327 306
328#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 307#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
329 ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
330 ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
331 do { 308 do {
332 struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; 309 struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
333 310
@@ -419,31 +396,15 @@ static inline void ccid2_new_ack(struct sock *sk,
419{ 396{
420 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 397 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
421 398
422 /* slow start */
423 if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { 399 if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
424 hctx->ccid2hctx_acks = 0; 400 if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
425 401 hctx->ccid2hctx_cwnd += 1;
426 /* We can increase cwnd at most maxincr [ack_ratio/2] */ 402 *maxincr -= 1;
427 if (*maxincr) { 403 hctx->ccid2hctx_packets_acked = 0;
428 /* increase every 2 acks */
429 hctx->ccid2hctx_ssacks++;
430 if (hctx->ccid2hctx_ssacks == 2) {
431 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
432 hctx->ccid2hctx_ssacks = 0;
433 *maxincr = *maxincr - 1;
434 }
435 } else {
436 /* increased cwnd enough for this single ack */
437 hctx->ccid2hctx_ssacks = 0;
438 }
439 } else {
440 hctx->ccid2hctx_ssacks = 0;
441 hctx->ccid2hctx_acks++;
442
443 if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
444 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
445 hctx->ccid2hctx_acks = 0;
446 } 404 }
405 } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
406 hctx->ccid2hctx_cwnd += 1;
407 hctx->ccid2hctx_packets_acked = 0;
447 } 408 }
448 409
449 /* update RTO */ 410 /* update RTO */
@@ -502,7 +463,6 @@ static inline void ccid2_new_ack(struct sock *sk,
502 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", 463 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
503 hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, 464 hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
504 hctx->ccid2hctx_rto, HZ, r); 465 hctx->ccid2hctx_rto, HZ, r);
505 hctx->ccid2hctx_sent = 0;
506 } 466 }
507 467
508 /* we got a new ack, so re-start RTO timer */ 468 /* we got a new ack, so re-start RTO timer */
@@ -514,16 +474,19 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
514{ 474{
515 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 475 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
516 476
517 ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1); 477 if (hctx->ccid2hctx_pipe == 0)
518 BUG_ON(hctx->ccid2hctx_pipe < 0); 478 DCCP_BUG("pipe == 0");
479 else
480 hctx->ccid2hctx_pipe--;
519 481
520 if (hctx->ccid2hctx_pipe == 0) 482 if (hctx->ccid2hctx_pipe == 0)
521 ccid2_hc_tx_kill_rto_timer(sk); 483 ccid2_hc_tx_kill_rto_timer(sk);
522} 484}
523 485
524static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, 486static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
525 struct ccid2_seq *seqp)
526{ 487{
488 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
489
527 if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { 490 if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
528 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 491 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
529 return; 492 return;
@@ -531,10 +494,12 @@ static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
531 494
532 hctx->ccid2hctx_last_cong = jiffies; 495 hctx->ccid2hctx_last_cong = jiffies;
533 496
534 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1); 497 hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U;
535 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; 498 hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
536 if (hctx->ccid2hctx_ssthresh < 2) 499
537 hctx->ccid2hctx_ssthresh = 2; 500 /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
501 if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
502 ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
538} 503}
539 504
540static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 505static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -570,12 +535,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
570 hctx->ccid2hctx_rpdupack++; 535 hctx->ccid2hctx_rpdupack++;
571 536
572 /* check if we got enough dupacks */ 537 /* check if we got enough dupacks */
573 if (hctx->ccid2hctx_rpdupack >= 538 if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
574 hctx->ccid2hctx_numdupack) {
575 hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ 539 hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
576 hctx->ccid2hctx_rpseq = 0; 540 hctx->ccid2hctx_rpseq = 0;
577 541
578 ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1); 542 ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
579 } 543 }
580 } 544 }
581 } 545 }
@@ -606,12 +570,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
606 } 570 }
607 } 571 }
608 572
609 /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for 573 /*
610 * this single ack. I round up. 574 * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
611 * -sorbo. 575 * packets per acknowledgement. Rounding up avoids that cwnd is not
576 * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
612 */ 577 */
613 maxincr = dp->dccps_l_ack_ratio >> 1; 578 if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
614 maxincr++; 579 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
615 580
616 /* go through all ack vectors */ 581 /* go through all ack vectors */
617 while ((offset = ccid2_ackvector(sk, skb, offset, 582 while ((offset = ccid2_ackvector(sk, skb, offset,
@@ -619,9 +584,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
619 /* go through this ack vector */ 584 /* go through this ack vector */
620 while (veclen--) { 585 while (veclen--) {
621 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 586 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
622 u64 ackno_end_rl; 587 u64 ackno_end_rl = SUB48(ackno, rl);
623 588
624 dccp_set_seqno(&ackno_end_rl, ackno - rl);
625 ccid2_pr_debug("ackvec start:%llu end:%llu\n", 589 ccid2_pr_debug("ackvec start:%llu end:%llu\n",
626 (unsigned long long)ackno, 590 (unsigned long long)ackno,
627 (unsigned long long)ackno_end_rl); 591 (unsigned long long)ackno_end_rl);
@@ -651,7 +615,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
651 !seqp->ccid2s_acked) { 615 !seqp->ccid2s_acked) {
652 if (state == 616 if (state ==
653 DCCP_ACKVEC_STATE_ECN_MARKED) { 617 DCCP_ACKVEC_STATE_ECN_MARKED) {
654 ccid2_congestion_event(hctx, 618 ccid2_congestion_event(sk,
655 seqp); 619 seqp);
656 } else 620 } else
657 ccid2_new_ack(sk, seqp, 621 ccid2_new_ack(sk, seqp,
@@ -666,13 +630,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
666 done = 1; 630 done = 1;
667 break; 631 break;
668 } 632 }
669 seqp = seqp->ccid2s_next; 633 seqp = seqp->ccid2s_prev;
670 } 634 }
671 if (done) 635 if (done)
672 break; 636 break;
673 637
674 638 ackno = SUB48(ackno_end_rl, 1);
675 dccp_set_seqno(&ackno, ackno_end_rl - 1);
676 vector++; 639 vector++;
677 } 640 }
678 if (done) 641 if (done)
@@ -694,7 +657,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
694 while (1) { 657 while (1) {
695 if (seqp->ccid2s_acked) { 658 if (seqp->ccid2s_acked) {
696 done++; 659 done++;
697 if (done == hctx->ccid2hctx_numdupack) 660 if (done == NUMDUPACK)
698 break; 661 break;
699 } 662 }
700 if (seqp == hctx->ccid2hctx_seqt) 663 if (seqp == hctx->ccid2hctx_seqt)
@@ -705,7 +668,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
705 /* If there are at least 3 acknowledgements, anything unacknowledged 668 /* If there are at least 3 acknowledgements, anything unacknowledged
706 * below the last sequence number is considered lost 669 * below the last sequence number is considered lost
707 */ 670 */
708 if (done == hctx->ccid2hctx_numdupack) { 671 if (done == NUMDUPACK) {
709 struct ccid2_seq *last_acked = seqp; 672 struct ccid2_seq *last_acked = seqp;
710 673
711 /* check for lost packets */ 674 /* check for lost packets */
@@ -717,7 +680,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
717 * order to detect multiple congestion events in 680 * order to detect multiple congestion events in
718 * one ack vector. 681 * one ack vector.
719 */ 682 */
720 ccid2_congestion_event(hctx, seqp); 683 ccid2_congestion_event(sk, seqp);
721 ccid2_hc_tx_dec_pipe(sk); 684 ccid2_hc_tx_dec_pipe(sk);
722 } 685 }
723 if (seqp == hctx->ccid2hctx_seqt) 686 if (seqp == hctx->ccid2hctx_seqt)
@@ -742,14 +705,23 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
742static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 705static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
743{ 706{
744 struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); 707 struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
708 struct dccp_sock *dp = dccp_sk(sk);
709 u32 max_ratio;
710
711 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
712 hctx->ccid2hctx_ssthresh = ~0U;
745 713
746 ccid2_change_cwnd(hctx, 1); 714 /*
747 /* Initialize ssthresh to infinity. This means that we will exit the 715 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
748 * initial slow-start after the first packet loss. This is what we 716 * packets for new connections, following the rules from [RFC3390]".
749 * want. 717 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
750 */ 718 */
751 hctx->ccid2hctx_ssthresh = ~0; 719 hctx->ccid2hctx_cwnd = min(4U, max(2U, 4380U / dp->dccps_mss_cache));
752 hctx->ccid2hctx_numdupack = 3; 720
721 /* Make sure that Ack Ratio is enabled and within bounds. */
722 max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
723 if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
724 dp->dccps_l_ack_ratio = max_ratio;
753 725
754 /* XXX init ~ to window size... */ 726 /* XXX init ~ to window size... */
755 if (ccid2_hc_tx_alloc_seq(hctx)) 727 if (ccid2_hc_tx_alloc_seq(hctx))
@@ -760,10 +732,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
760 hctx->ccid2hctx_rttvar = -1; 732 hctx->ccid2hctx_rttvar = -1;
761 hctx->ccid2hctx_rpdupack = -1; 733 hctx->ccid2hctx_rpdupack = -1;
762 hctx->ccid2hctx_last_cong = jiffies; 734 hctx->ccid2hctx_last_cong = jiffies;
763 735 setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
764 hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; 736 (unsigned long)sk);
765 hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
766 init_timer(&hctx->ccid2hctx_rtotimer);
767 737
768 ccid2_hc_tx_check_sanity(hctx); 738 ccid2_hc_tx_check_sanity(hctx);
769 return 0; 739 return 0;
@@ -800,7 +770,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
800 770
801static struct ccid_operations ccid2 = { 771static struct ccid_operations ccid2 = {
802 .ccid_id = DCCPC_CCID2, 772 .ccid_id = DCCPC_CCID2,
803 .ccid_name = "ccid2", 773 .ccid_name = "TCP-like",
804 .ccid_owner = THIS_MODULE, 774 .ccid_owner = THIS_MODULE,
805 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), 775 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
806 .ccid_hc_tx_init = ccid2_hc_tx_init, 776 .ccid_hc_tx_init = ccid2_hc_tx_init,
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index d9daa534c9be..2c94ca029010 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -24,6 +24,8 @@
24#include <linux/timer.h> 24#include <linux/timer.h>
25#include <linux/types.h> 25#include <linux/types.h>
26#include "../ccid.h" 26#include "../ccid.h"
27/* NUMDUPACK parameter from RFC 4341, p. 6 */
28#define NUMDUPACK 3
27 29
28struct sock; 30struct sock;
29 31
@@ -40,22 +42,17 @@ struct ccid2_seq {
40 42
41/** struct ccid2_hc_tx_sock - CCID2 TX half connection 43/** struct ccid2_hc_tx_sock - CCID2 TX half connection
42 * 44 *
43 * @ccid2hctx_ssacks - ACKs recv in slow start 45 * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @ccid2hctx_acks - ACKS recv in AI phase 46 * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
45 * @ccid2hctx_sent - packets sent in this window
46 * @ccid2hctx_lastrtt -time RTT was last measured 47 * @ccid2hctx_lastrtt -time RTT was last measured
47 * @ccid2hctx_arsent - packets sent [ack ratio]
48 * @ccid2hctx_ackloss - ack was lost in this win
49 * @ccid2hctx_rpseq - last consecutive seqno 48 * @ccid2hctx_rpseq - last consecutive seqno
50 * @ccid2hctx_rpdupack - dupacks since rpseq 49 * @ccid2hctx_rpdupack - dupacks since rpseq
51*/ 50*/
52struct ccid2_hc_tx_sock { 51struct ccid2_hc_tx_sock {
53 u32 ccid2hctx_cwnd; 52 u32 ccid2hctx_cwnd;
54 int ccid2hctx_ssacks; 53 u32 ccid2hctx_ssthresh;
55 int ccid2hctx_acks; 54 u32 ccid2hctx_pipe;
56 unsigned int ccid2hctx_ssthresh; 55 u32 ccid2hctx_packets_acked;
57 int ccid2hctx_pipe;
58 int ccid2hctx_numdupack;
59 struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; 56 struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
60 int ccid2hctx_seqbufc; 57 int ccid2hctx_seqbufc;
61 struct ccid2_seq *ccid2hctx_seqh; 58 struct ccid2_seq *ccid2hctx_seqh;
@@ -63,14 +60,10 @@ struct ccid2_hc_tx_sock {
63 long ccid2hctx_rto; 60 long ccid2hctx_rto;
64 long ccid2hctx_srtt; 61 long ccid2hctx_srtt;
65 long ccid2hctx_rttvar; 62 long ccid2hctx_rttvar;
66 int ccid2hctx_sent;
67 unsigned long ccid2hctx_lastrtt; 63 unsigned long ccid2hctx_lastrtt;
68 struct timer_list ccid2hctx_rtotimer; 64 struct timer_list ccid2hctx_rtotimer;
69 unsigned long ccid2hctx_arsent;
70 int ccid2hctx_ackloss;
71 u64 ccid2hctx_rpseq; 65 u64 ccid2hctx_rpseq;
72 int ccid2hctx_rpdupack; 66 int ccid2hctx_rpdupack;
73 int ccid2hctx_sendwait;
74 unsigned long ccid2hctx_last_cong; 67 unsigned long ccid2hctx_last_cong;
75 u64 ccid2hctx_high_ack; 68 u64 ccid2hctx_high_ack;
76}; 69};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 19b33586333d..e76f460af0ea 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * net/dccp/ccids/ccid3.c 2 * net/dccp/ccids/ccid3.c
3 * 3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
4 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. 5 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> 6 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 * 7 *
@@ -33,11 +34,7 @@
33 * along with this program; if not, write to the Free Software 34 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 35 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */ 36 */
36#include "../ccid.h"
37#include "../dccp.h" 37#include "../dccp.h"
38#include "lib/packet_history.h"
39#include "lib/loss_interval.h"
40#include "lib/tfrc.h"
41#include "ccid3.h" 38#include "ccid3.h"
42 39
43#include <asm/unaligned.h> 40#include <asm/unaligned.h>
@@ -49,9 +46,6 @@ static int ccid3_debug;
49#define ccid3_pr_debug(format, a...) 46#define ccid3_pr_debug(format, a...)
50#endif 47#endif
51 48
52static struct dccp_tx_hist *ccid3_tx_hist;
53static struct dccp_rx_hist *ccid3_rx_hist;
54
55/* 49/*
56 * Transmitter Half-Connection Routines 50 * Transmitter Half-Connection Routines
57 */ 51 */
@@ -83,24 +77,27 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
83} 77}
84 78
85/* 79/*
86 * Compute the initial sending rate X_init according to RFC 3390: 80 * Compute the initial sending rate X_init in the manner of RFC 3390:
87 * w_init = min(4 * MSS, max(2 * MSS, 4380 bytes)) 81 *
88 * X_init = w_init / RTT 82 * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT
83 *
84 * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis
85 * (rev-02) clarifies the use of RFC 3390 with regard to the above formula.
89 * For consistency with other parts of the code, X_init is scaled by 2^6. 86 * For consistency with other parts of the code, X_init is scaled by 2^6.
90 */ 87 */
91static inline u64 rfc3390_initial_rate(struct sock *sk) 88static inline u64 rfc3390_initial_rate(struct sock *sk)
92{ 89{
93 const struct dccp_sock *dp = dccp_sk(sk); 90 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
94 const __u32 w_init = min(4 * dp->dccps_mss_cache, 91 const __u32 w_init = min_t(__u32, 4 * hctx->ccid3hctx_s,
95 max(2 * dp->dccps_mss_cache, 4380U)); 92 max_t(__u32, 2 * hctx->ccid3hctx_s, 4380));
96 93
97 return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt); 94 return scaled_div(w_init << 6, hctx->ccid3hctx_rtt);
98} 95}
99 96
100/* 97/*
101 * Recalculate t_ipi and delta (should be called whenever X changes) 98 * Recalculate t_ipi and delta (should be called whenever X changes)
102 */ 99 */
103static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) 100static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
104{ 101{
105 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ 102 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
106 hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, 103 hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
@@ -116,6 +113,13 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
116 113
117} 114}
118 115
116static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
117{
118 u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
119
120 return delta / hctx->ccid3hctx_rtt;
121}
122
119/** 123/**
120 * ccid3_hc_tx_update_x - Update allowed sending rate X 124 * ccid3_hc_tx_update_x - Update allowed sending rate X
121 * @stamp: most recent time if available - can be left NULL. 125 * @stamp: most recent time if available - can be left NULL.
@@ -127,19 +131,19 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
127 * 131 *
128 */ 132 */
129static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) 133static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
130
131{ 134{
132 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 135 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
133 __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; 136 __u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
134 const __u64 old_x = hctx->ccid3hctx_x; 137 const __u64 old_x = hctx->ccid3hctx_x;
135 ktime_t now = stamp? *stamp : ktime_get_real(); 138 ktime_t now = stamp ? *stamp : ktime_get_real();
136 139
137 /* 140 /*
138 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate 141 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
139 * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis. 142 * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis:
143 * a sender is idle if it has not sent anything over a 2-RTT-period.
140 * For consistency with X and X_recv, min_rate is also scaled by 2^6. 144 * For consistency with X and X_recv, min_rate is also scaled by 2^6.
141 */ 145 */
142 if (unlikely(hctx->ccid3hctx_idle)) { 146 if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) {
143 min_rate = rfc3390_initial_rate(sk); 147 min_rate = rfc3390_initial_rate(sk);
144 min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); 148 min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
145 } 149 }
@@ -181,7 +185,7 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
181{ 185{
182 const u16 old_s = hctx->ccid3hctx_s; 186 const u16 old_s = hctx->ccid3hctx_s;
183 187
184 hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10; 188 hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9);
185 189
186 if (hctx->ccid3hctx_s != old_s) 190 if (hctx->ccid3hctx_s != old_s)
187 ccid3_update_send_interval(hctx); 191 ccid3_update_send_interval(hctx);
@@ -225,29 +229,27 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
225 ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, 229 ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
226 ccid3_tx_state_name(hctx->ccid3hctx_state)); 230 ccid3_tx_state_name(hctx->ccid3hctx_state));
227 231
228 hctx->ccid3hctx_idle = 1; 232 if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK)
233 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
234 else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
235 goto out;
229 236
230 switch (hctx->ccid3hctx_state) { 237 /*
231 case TFRC_SSTATE_NO_FBACK: 238 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
232 /* RFC 3448, 4.4: Halve send rate directly */ 239 */
240 if (hctx->ccid3hctx_t_rto == 0 || /* no feedback received yet */
241 hctx->ccid3hctx_p == 0) {
242
243 /* halve send rate directly */
233 hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, 244 hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
234 (((__u64)hctx->ccid3hctx_s) << 6) / 245 (((__u64)hctx->ccid3hctx_s) << 6) /
235 TFRC_T_MBI); 246 TFRC_T_MBI);
236
237 ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
238 "bytes/s\n", dccp_role(sk), sk,
239 ccid3_tx_state_name(hctx->ccid3hctx_state),
240 (unsigned)(hctx->ccid3hctx_x >> 6));
241 /* The value of R is still undefined and so we can not recompute
242 * the timout value. Keep initial value as per [RFC 4342, 5]. */
243 t_nfb = TFRC_INITIAL_TIMEOUT;
244 ccid3_update_send_interval(hctx); 247 ccid3_update_send_interval(hctx);
245 break; 248 } else {
246 case TFRC_SSTATE_FBACK:
247 /* 249 /*
248 * Modify the cached value of X_recv [RFC 3448, 4.4] 250 * Modify the cached value of X_recv
249 * 251 *
250 * If (p == 0 || X_calc > 2 * X_recv) 252 * If (X_calc > 2 * X_recv)
251 * X_recv = max(X_recv / 2, s / (2 * t_mbi)); 253 * X_recv = max(X_recv / 2, s / (2 * t_mbi));
252 * Else 254 * Else
253 * X_recv = X_calc / 4; 255 * X_recv = X_calc / 4;
@@ -256,32 +258,28 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 */ 258 */
257 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); 259 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
258 260
259 if (hctx->ccid3hctx_p == 0 || 261 if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))
260 (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
261
262 hctx->ccid3hctx_x_recv = 262 hctx->ccid3hctx_x_recv =
263 max(hctx->ccid3hctx_x_recv / 2, 263 max(hctx->ccid3hctx_x_recv / 2,
264 (((__u64)hctx->ccid3hctx_s) << 6) / 264 (((__u64)hctx->ccid3hctx_s) << 6) /
265 (2 * TFRC_T_MBI)); 265 (2 * TFRC_T_MBI));
266 } else { 266 else {
267 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; 267 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
268 hctx->ccid3hctx_x_recv <<= 4; 268 hctx->ccid3hctx_x_recv <<= 4;
269 } 269 }
270 /* Now recalculate X [RFC 3448, 4.3, step (4)] */
271 ccid3_hc_tx_update_x(sk, NULL); 270 ccid3_hc_tx_update_x(sk, NULL);
272 /*
273 * Schedule no feedback timer to expire in
274 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
275 * See comments in packet_recv() regarding the value of t_RTO.
276 */
277 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
278 break;
279 case TFRC_SSTATE_NO_SENT:
280 DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
281 /* fall through */
282 case TFRC_SSTATE_TERM:
283 goto out;
284 } 271 }
272 ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n",
273 (unsigned long long)hctx->ccid3hctx_x);
274
275 /*
276 * Set new timeout for the nofeedback timer.
277 * See comments in packet_recv() regarding the value of t_RTO.
278 */
279 if (unlikely(hctx->ccid3hctx_t_rto == 0)) /* no feedback yet */
280 t_nfb = TFRC_INITIAL_TIMEOUT;
281 else
282 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
285 283
286restart_timer: 284restart_timer:
287 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 285 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
@@ -336,8 +334,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
336 hctx->ccid3hctx_x = rfc3390_initial_rate(sk); 334 hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
337 hctx->ccid3hctx_t_ld = now; 335 hctx->ccid3hctx_t_ld = now;
338 } else { 336 } else {
339 /* Sender does not have RTT sample: X = MSS/second */ 337 /* Sender does not have RTT sample: X_pps = 1 pkt/sec */
340 hctx->ccid3hctx_x = dp->dccps_mss_cache; 338 hctx->ccid3hctx_x = hctx->ccid3hctx_s;
341 hctx->ccid3hctx_x <<= 6; 339 hctx->ccid3hctx_x <<= 6;
342 } 340 }
343 ccid3_update_send_interval(hctx); 341 ccid3_update_send_interval(hctx);
@@ -369,7 +367,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
369 /* prepare to send now (add options etc.) */ 367 /* prepare to send now (add options etc.) */
370 dp->dccps_hc_tx_insert_options = 1; 368 dp->dccps_hc_tx_insert_options = 1;
371 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; 369 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
372 hctx->ccid3hctx_idle = 0;
373 370
374 /* set the nominal send time for the next following packet */ 371 /* set the nominal send time for the next following packet */
375 hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, 372 hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
@@ -381,28 +378,17 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
381 unsigned int len) 378 unsigned int len)
382{ 379{
383 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 380 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
384 struct dccp_tx_hist_entry *packet;
385 381
386 ccid3_hc_tx_update_s(hctx, len); 382 ccid3_hc_tx_update_s(hctx, len);
387 383
388 packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); 384 if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss))
389 if (unlikely(packet == NULL)) {
390 DCCP_CRIT("packet history - out of memory!"); 385 DCCP_CRIT("packet history - out of memory!");
391 return;
392 }
393 dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);
394
395 packet->dccphtx_tstamp = ktime_get_real();
396 packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
397 packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
398 packet->dccphtx_sent = 1;
399} 386}
400 387
401static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 388static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
402{ 389{
403 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 390 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
404 struct ccid3_options_received *opt_recv; 391 struct ccid3_options_received *opt_recv;
405 struct dccp_tx_hist_entry *packet;
406 ktime_t now; 392 ktime_t now;
407 unsigned long t_nfb; 393 unsigned long t_nfb;
408 u32 pinv, r_sample; 394 u32 pinv, r_sample;
@@ -411,131 +397,112 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
411 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 397 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
412 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 398 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
413 return; 399 return;
400 /* ... and only in the established state */
401 if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK &&
402 hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
403 return;
414 404
415 opt_recv = &hctx->ccid3hctx_options_received; 405 opt_recv = &hctx->ccid3hctx_options_received;
406 now = ktime_get_real();
416 407
417 switch (hctx->ccid3hctx_state) { 408 /* Estimate RTT from history if ACK number is valid */
418 case TFRC_SSTATE_NO_FBACK: 409 r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
419 case TFRC_SSTATE_FBACK: 410 DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
420 /* get packet from history to look up t_recvdata */ 411 if (r_sample == 0) {
421 packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, 412 DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
422 DCCP_SKB_CB(skb)->dccpd_ack_seq); 413 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
423 if (unlikely(packet == NULL)) { 414 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
424 DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist " 415 return;
425 "in history!\n", dccp_role(sk), sk, 416 }
426 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
427 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
428 return;
429 }
430
431 /* Update receive rate in units of 64 * bytes/second */
432 hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
433 hctx->ccid3hctx_x_recv <<= 6;
434 417
435 /* Update loss event rate */ 418 /* Update receive rate in units of 64 * bytes/second */
436 pinv = opt_recv->ccid3or_loss_event_rate; 419 hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
437 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ 420 hctx->ccid3hctx_x_recv <<= 6;
438 hctx->ccid3hctx_p = 0;
439 else /* can not exceed 100% */
440 hctx->ccid3hctx_p = 1000000 / pinv;
441 421
442 now = ktime_get_real(); 422 /* Update loss event rate (which is scaled by 1e6) */
443 /* 423 pinv = opt_recv->ccid3or_loss_event_rate;
444 * Calculate new round trip sample as per [RFC 3448, 4.3] by 424 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
445 * R_sample = (now - t_recvdata) - t_elapsed 425 hctx->ccid3hctx_p = 0;
446 */ 426 else /* can not exceed 100% */
447 r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp)); 427 hctx->ccid3hctx_p = scaled_div(1, pinv);
428 /*
429 * Validate new RTT sample and update moving average
430 */
431 r_sample = dccp_sample_rtt(sk, r_sample);
432 hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
433 /*
434 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
435 */
436 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
437 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
448 438
449 /* 439 if (hctx->ccid3hctx_t_rto == 0) {
450 * Update RTT estimate by
451 * If (No feedback recv)
452 * R = R_sample;
453 * Else
454 * R = q * R + (1 - q) * R_sample;
455 *
456 * q is a constant, RFC 3448 recomments 0.9
457 */
458 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
459 /* 440 /*
460 * Larger Initial Windows [RFC 4342, sec. 5] 441 * Initial feedback packet: Larger Initial Windows (4.2)
461 */ 442 */
462 hctx->ccid3hctx_rtt = r_sample;
463 hctx->ccid3hctx_x = rfc3390_initial_rate(sk); 443 hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
464 hctx->ccid3hctx_t_ld = now; 444 hctx->ccid3hctx_t_ld = now;
465 445
466 ccid3_update_send_interval(hctx); 446 ccid3_update_send_interval(hctx);
467 447
468 ccid3_pr_debug("%s(%p), s=%u, MSS=%u, " 448 goto done_computing_x;
469 "R_sample=%uus, X=%u\n", dccp_role(sk), 449 } else if (hctx->ccid3hctx_p == 0) {
470 sk, hctx->ccid3hctx_s, 450 /*
471 dccp_sk(sk)->dccps_mss_cache, r_sample, 451 * First feedback after nofeedback timer expiry (4.3)
472 (unsigned)(hctx->ccid3hctx_x >> 6)); 452 */
473 453 goto done_computing_x;
474 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
475 } else {
476 hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
477 r_sample) / 10;
478
479 /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
480 if (hctx->ccid3hctx_p > 0)
481 hctx->ccid3hctx_x_calc =
482 tfrc_calc_x(hctx->ccid3hctx_s,
483 hctx->ccid3hctx_rtt,
484 hctx->ccid3hctx_p);
485 ccid3_hc_tx_update_x(sk, &now);
486
487 ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
488 "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
489 dccp_role(sk),
490 sk, hctx->ccid3hctx_rtt, r_sample,
491 hctx->ccid3hctx_s, hctx->ccid3hctx_p,
492 hctx->ccid3hctx_x_calc,
493 (unsigned)(hctx->ccid3hctx_x_recv >> 6),
494 (unsigned)(hctx->ccid3hctx_x >> 6));
495 } 454 }
455 }
496 456
497 /* unschedule no feedback timer */ 457 /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
498 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); 458 if (hctx->ccid3hctx_p > 0)
459 hctx->ccid3hctx_x_calc =
460 tfrc_calc_x(hctx->ccid3hctx_s,
461 hctx->ccid3hctx_rtt,
462 hctx->ccid3hctx_p);
463 ccid3_hc_tx_update_x(sk, &now);
464
465done_computing_x:
466 ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
467 "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
468 dccp_role(sk),
469 sk, hctx->ccid3hctx_rtt, r_sample,
470 hctx->ccid3hctx_s, hctx->ccid3hctx_p,
471 hctx->ccid3hctx_x_calc,
472 (unsigned)(hctx->ccid3hctx_x_recv >> 6),
473 (unsigned)(hctx->ccid3hctx_x >> 6));
499 474
500 /* remove all packets older than the one acked from history */ 475 /* unschedule no feedback timer */
501 dccp_tx_hist_purge_older(ccid3_tx_hist, 476 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
502 &hctx->ccid3hctx_hist, packet);
503 /*
504 * As we have calculated new ipi, delta, t_nom it is possible
505 * that we now can send a packet, so wake up dccp_wait_for_ccid
506 */
507 sk->sk_write_space(sk);
508 477
509 /* 478 /*
510 * Update timeout interval for the nofeedback timer. 479 * As we have calculated new ipi, delta, t_nom it is possible
511 * We use a configuration option to increase the lower bound. 480 * that we now can send a packet, so wake up dccp_wait_for_ccid
512 * This can help avoid triggering the nofeedback timer too 481 */
513 * often ('spinning') on LANs with small RTTs. 482 sk->sk_write_space(sk);
514 */
515 hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
516 CONFIG_IP_DCCP_CCID3_RTO *
517 (USEC_PER_SEC/1000));
518 /*
519 * Schedule no feedback timer to expire in
520 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
521 */
522 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
523 483
524 ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " 484 /*
525 "expire in %lu jiffies (%luus)\n", 485 * Update timeout interval for the nofeedback timer.
526 dccp_role(sk), 486 * We use a configuration option to increase the lower bound.
527 sk, usecs_to_jiffies(t_nfb), t_nfb); 487 * This can help avoid triggering the nofeedback timer too
488 * often ('spinning') on LANs with small RTTs.
489 */
490 hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
491 (CONFIG_IP_DCCP_CCID3_RTO *
492 (USEC_PER_SEC / 1000)));
493 /*
494 * Schedule no feedback timer to expire in
495 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
496 */
497 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
528 498
529 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 499 ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
530 jiffies + usecs_to_jiffies(t_nfb)); 500 "expire in %lu jiffies (%luus)\n",
501 dccp_role(sk),
502 sk, usecs_to_jiffies(t_nfb), t_nfb);
531 503
532 /* set idle flag */ 504 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
533 hctx->ccid3hctx_idle = 1; 505 jiffies + usecs_to_jiffies(t_nfb));
534 break;
535 case TFRC_SSTATE_NO_SENT: /* fall through */
536 case TFRC_SSTATE_TERM: /* ignore feedback when closing */
537 break;
538 }
539} 506}
540 507
541static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 508static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
@@ -605,12 +572,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
605 struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); 572 struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
606 573
607 hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; 574 hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
608 INIT_LIST_HEAD(&hctx->ccid3hctx_hist); 575 hctx->ccid3hctx_hist = NULL;
609 576 setup_timer(&hctx->ccid3hctx_no_feedback_timer,
610 hctx->ccid3hctx_no_feedback_timer.function = 577 ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
611 ccid3_hc_tx_no_feedback_timer;
612 hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
613 init_timer(&hctx->ccid3hctx_no_feedback_timer);
614 578
615 return 0; 579 return 0;
616} 580}
@@ -622,8 +586,7 @@ static void ccid3_hc_tx_exit(struct sock *sk)
622 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); 586 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
623 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); 587 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
624 588
625 /* Empty packet history */ 589 tfrc_tx_hist_purge(&hctx->ccid3hctx_hist);
626 dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
627} 590}
628 591
629static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 592static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
@@ -670,6 +633,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
670/* 633/*
671 * Receiver Half-Connection Routines 634 * Receiver Half-Connection Routines
672 */ 635 */
636
637/* CCID3 feedback types */
638enum ccid3_fback_type {
639 CCID3_FBACK_NONE = 0,
640 CCID3_FBACK_INITIAL,
641 CCID3_FBACK_PERIODIC,
642 CCID3_FBACK_PARAM_CHANGE
643};
644
673#ifdef CONFIG_IP_DCCP_CCID3_DEBUG 645#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
674static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) 646static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
675{ 647{
@@ -696,67 +668,58 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
696 hcrx->ccid3hcrx_state = state; 668 hcrx->ccid3hcrx_state = state;
697} 669}
698 670
699static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) 671static void ccid3_hc_rx_send_feedback(struct sock *sk,
700{ 672 const struct sk_buff *skb,
701 if (unlikely(len == 0)) /* don't update on empty packets (e.g. ACKs) */ 673 enum ccid3_fback_type fbtype)
702 ccid3_pr_debug("Packet payload length is 0 - not updating\n");
703 else
704 hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
705 (9 * hcrx->ccid3hcrx_s + len) / 10;
706}
707
708static void ccid3_hc_rx_send_feedback(struct sock *sk)
709{ 674{
710 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 675 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
711 struct dccp_sock *dp = dccp_sk(sk); 676 struct dccp_sock *dp = dccp_sk(sk);
712 struct dccp_rx_hist_entry *packet;
713 ktime_t now; 677 ktime_t now;
714 suseconds_t delta; 678 s64 delta = 0;
715 679
716 ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); 680 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
681 return;
717 682
718 now = ktime_get_real(); 683 now = ktime_get_real();
719 684
720 switch (hcrx->ccid3hcrx_state) { 685 switch (fbtype) {
721 case TFRC_RSTATE_NO_DATA: 686 case CCID3_FBACK_INITIAL:
722 hcrx->ccid3hcrx_x_recv = 0; 687 hcrx->ccid3hcrx_x_recv = 0;
688 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
723 break; 689 break;
724 case TFRC_RSTATE_DATA: 690 case CCID3_FBACK_PARAM_CHANGE:
725 delta = ktime_us_delta(now, 691 /*
726 hcrx->ccid3hcrx_tstamp_last_feedback); 692 * When parameters change (new loss or p > p_prev), we do not
727 DCCP_BUG_ON(delta < 0); 693 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
728 hcrx->ccid3hcrx_x_recv = 694 * need to reuse the previous value of X_recv. However, when
729 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); 695 * X_recv was 0 (due to early loss), this would kill X down to
696 * s/t_mbi (i.e. one packet in 64 seconds).
697 * To avoid such drastic reduction, we approximate X_recv as
698 * the number of bytes since last feedback.
699 * This is a safe fallback, since X is bounded above by X_calc.
700 */
701 if (hcrx->ccid3hcrx_x_recv > 0)
702 break;
703 /* fall through */
704 case CCID3_FBACK_PERIODIC:
705 delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
706 if (delta <= 0)
707 DCCP_BUG("delta (%ld) <= 0", (long)delta);
708 else
709 hcrx->ccid3hcrx_x_recv =
710 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
730 break; 711 break;
731 case TFRC_RSTATE_TERM: 712 default:
732 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
733 return; 713 return;
734 } 714 }
735 715
736 packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); 716 ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
737 if (unlikely(packet == NULL)) { 717 hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
738 DCCP_WARN("%s(%p), no data packet in history!\n",
739 dccp_role(sk), sk);
740 return;
741 }
742 718
743 hcrx->ccid3hcrx_tstamp_last_feedback = now; 719 hcrx->ccid3hcrx_tstamp_last_feedback = now;
744 hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; 720 hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval;
745 hcrx->ccid3hcrx_bytes_recv = 0; 721 hcrx->ccid3hcrx_bytes_recv = 0;
746 722
747 /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
748 delta = ktime_us_delta(now, packet->dccphrx_tstamp);
749 DCCP_BUG_ON(delta < 0);
750 hcrx->ccid3hcrx_elapsed_time = delta / 10;
751
752 if (hcrx->ccid3hcrx_p == 0)
753 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
754 else if (hcrx->ccid3hcrx_p > 1000000) {
755 DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
756 hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */
757 } else
758 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
759
760 dp->dccps_hc_rx_insert_options = 1; 723 dp->dccps_hc_rx_insert_options = 1;
761 dccp_send_ack(sk); 724 dccp_send_ack(sk);
762} 725}
@@ -770,7 +733,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
770 return 0; 733 return 0;
771 734
772 hcrx = ccid3_hc_rx_sk(sk); 735 hcrx = ccid3_hc_rx_sk(sk);
773 DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
774 736
775 if (dccp_packet_without_ack(skb)) 737 if (dccp_packet_without_ack(skb))
776 return 0; 738 return 0;
@@ -778,11 +740,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
778 x_recv = htonl(hcrx->ccid3hcrx_x_recv); 740 x_recv = htonl(hcrx->ccid3hcrx_x_recv);
779 pinv = htonl(hcrx->ccid3hcrx_pinv); 741 pinv = htonl(hcrx->ccid3hcrx_pinv);
780 742
781 if ((hcrx->ccid3hcrx_elapsed_time != 0 && 743 if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
782 dccp_insert_option_elapsed_time(sk, skb,
783 hcrx->ccid3hcrx_elapsed_time)) ||
784 dccp_insert_option_timestamp(sk, skb) ||
785 dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
786 &pinv, sizeof(pinv)) || 744 &pinv, sizeof(pinv)) ||
787 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, 745 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
788 &x_recv, sizeof(x_recv))) 746 &x_recv, sizeof(x_recv)))
@@ -791,180 +749,139 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
791 return 0; 749 return 0;
792} 750}
793 751
794static int ccid3_hc_rx_detect_loss(struct sock *sk, 752/** ccid3_first_li - Implements [RFC 3448, 6.3.1]
795 struct dccp_rx_hist_entry *packet) 753 *
754 * Determine the length of the first loss interval via inverse lookup.
755 * Assume that X_recv can be computed by the throughput equation
756 * s
757 * X_recv = --------
758 * R * fval
759 * Find some p such that f(p) = fval; return 1/p (scaled).
760 */
761static u32 ccid3_first_li(struct sock *sk)
796{ 762{
797 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 763 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
798 struct dccp_rx_hist_entry *rx_hist = 764 u32 x_recv, p, delta;
799 dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); 765 u64 fval;
800 u64 seqno = packet->dccphrx_seqno;
801 u64 tmp_seqno;
802 int loss = 0;
803 u8 ccval;
804
805
806 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
807 766
808 if (!rx_hist || 767 if (hcrx->ccid3hcrx_rtt == 0) {
809 follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { 768 DCCP_WARN("No RTT estimate available, using fallback RTT\n");
810 hcrx->ccid3hcrx_seqno_nonloss = seqno; 769 hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT;
811 hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
812 goto detect_out;
813 } 770 }
814 771
815 772 delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback));
816 while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) 773 x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
817 > TFRC_RECV_NUM_LATE_LOSS) { 774 if (x_recv == 0) { /* would also trigger divide-by-zero */
818 loss = 1; 775 DCCP_WARN("X_recv==0\n");
819 dccp_li_update_li(sk, 776 if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
820 &hcrx->ccid3hcrx_li_hist, 777 DCCP_BUG("stored value of X_recv is zero");
821 &hcrx->ccid3hcrx_hist, 778 return ~0U;
822 hcrx->ccid3hcrx_tstamp_last_feedback,
823 hcrx->ccid3hcrx_s,
824 hcrx->ccid3hcrx_bytes_recv,
825 hcrx->ccid3hcrx_x_recv,
826 hcrx->ccid3hcrx_seqno_nonloss,
827 hcrx->ccid3hcrx_ccval_nonloss);
828 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
829 dccp_inc_seqno(&tmp_seqno);
830 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
831 dccp_inc_seqno(&tmp_seqno);
832 while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
833 tmp_seqno, &ccval)) {
834 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
835 hcrx->ccid3hcrx_ccval_nonloss = ccval;
836 dccp_inc_seqno(&tmp_seqno);
837 } 779 }
838 } 780 }
839 781
840 /* FIXME - this code could be simplified with above while */ 782 fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt);
841 /* but works at moment */ 783 fval = scaled_div32(fval, x_recv);
842 if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { 784 p = tfrc_calc_x_reverse_lookup(fval);
843 hcrx->ccid3hcrx_seqno_nonloss = seqno;
844 hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
845 }
846 785
847detect_out: 786 ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
848 dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, 787 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
849 &hcrx->ccid3hcrx_li_hist, packet, 788
850 hcrx->ccid3hcrx_seqno_nonloss); 789 return p == 0 ? ~0U : scaled_div(1, p);
851 return loss;
852} 790}
853 791
854static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) 792static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
855{ 793{
856 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 794 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
857 const struct dccp_options_received *opt_recv; 795 enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
858 struct dccp_rx_hist_entry *packet; 796 const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
859 u32 p_prev, r_sample, rtt_prev; 797 const bool is_data_packet = dccp_data_packet(skb);
860 int loss, payload_size; 798
861 ktime_t now; 799 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
862 800 if (is_data_packet) {
863 opt_recv = &dccp_sk(sk)->dccps_options_received; 801 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
864 802 do_feedback = CCID3_FBACK_INITIAL;
865 switch (DCCP_SKB_CB(skb)->dccpd_type) { 803 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
866 case DCCP_PKT_ACK: 804 hcrx->ccid3hcrx_s = payload;
867 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 805 /*
868 return; 806 * Not necessary to update ccid3hcrx_bytes_recv here,
869 case DCCP_PKT_DATAACK: 807 * since X_recv = 0 for the first feedback packet (cf.
870 if (opt_recv->dccpor_timestamp_echo == 0) 808 * RFC 3448, 6.3) -- gerrit
871 break; 809 */
872 r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo; 810 }
873 rtt_prev = hcrx->ccid3hcrx_rtt; 811 goto update_records;
874 r_sample = dccp_sample_rtt(sk, 10 * r_sample); 812 }
875 813
876 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 814 if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
877 hcrx->ccid3hcrx_rtt = r_sample; 815 return; /* done receiving */
878 else
879 hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
880 r_sample / 10;
881 816
882 if (rtt_prev != hcrx->ccid3hcrx_rtt) 817 if (is_data_packet) {
883 ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n", 818 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
884 dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, 819 /*
885 opt_recv->dccpor_elapsed_time); 820 * Update moving-average of s and the sum of received payload bytes
886 break; 821 */
887 case DCCP_PKT_DATA: 822 hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
888 break; 823 hcrx->ccid3hcrx_bytes_recv += payload;
889 default: /* We're not interested in other packet types, move along */
890 return;
891 } 824 }
892 825
893 packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, 826 /*
894 skb, GFP_ATOMIC); 827 * Handle pending losses and otherwise check for new loss
895 if (unlikely(packet == NULL)) { 828 */
896 DCCP_WARN("%s(%p), Not enough mem to add rx packet " 829 if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) &&
897 "to history, consider it lost!\n", dccp_role(sk), sk); 830 tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist,
898 return; 831 &hcrx->ccid3hcrx_li_hist,
832 skb, ndp, ccid3_first_li, sk) ) {
833 do_feedback = CCID3_FBACK_PARAM_CHANGE;
834 goto done_receiving;
899 } 835 }
900 836
901 loss = ccid3_hc_rx_detect_loss(sk, packet); 837 if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
838 goto update_records;
902 839
903 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) 840 /*
904 return; 841 * Handle data packets: RTT sampling and monitoring p
905 842 */
906 payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4; 843 if (unlikely(!is_data_packet))
907 ccid3_hc_rx_update_s(hcrx, payload_size); 844 goto update_records;
908 845
909 switch (hcrx->ccid3hcrx_state) { 846 if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) {
910 case TFRC_RSTATE_NO_DATA: 847 const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
911 ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " 848 /*
912 "feedback\n", dccp_role(sk), sk, 849 * Empty loss history: no loss so far, hence p stays 0.
913 dccp_state_name(sk->sk_state), skb); 850 * Sample RTT values, since an RTT estimate is required for the
914 ccid3_hc_rx_send_feedback(sk); 851 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
915 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); 852 */
916 return; 853 if (sample != 0)
917 case TFRC_RSTATE_DATA: 854 hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
918 hcrx->ccid3hcrx_bytes_recv += payload_size;
919 if (loss)
920 break;
921 855
922 now = ktime_get_real(); 856 } else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) {
923 if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - 857 /*
924 (s64)hcrx->ccid3hcrx_rtt) >= 0) { 858 * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
925 hcrx->ccid3hcrx_tstamp_last_ack = now; 859 * has decreased (resp. p has increased), send feedback now.
926 ccid3_hc_rx_send_feedback(sk); 860 */
927 } 861 do_feedback = CCID3_FBACK_PARAM_CHANGE;
928 return;
929 case TFRC_RSTATE_TERM:
930 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
931 return;
932 } 862 }
933 863
934 /* Dealing with packet loss */ 864 /*
935 ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", 865 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
936 dccp_role(sk), sk, dccp_state_name(sk->sk_state)); 866 */
937 867 if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
938 p_prev = hcrx->ccid3hcrx_p; 868 do_feedback = CCID3_FBACK_PERIODIC;
939
940 /* Calculate loss event rate */
941 if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
942 u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
943 869
944 /* Scaling up by 1000000 as fixed decimal */ 870update_records:
945 if (i_mean != 0) 871 tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
946 hcrx->ccid3hcrx_p = 1000000 / i_mean;
947 } else
948 DCCP_BUG("empty loss history");
949 872
950 if (hcrx->ccid3hcrx_p > p_prev) { 873done_receiving:
951 ccid3_hc_rx_send_feedback(sk); 874 if (do_feedback)
952 return; 875 ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
953 }
954} 876}
955 877
956static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) 878static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
957{ 879{
958 struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); 880 struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
959 881
960 ccid3_pr_debug("entry\n");
961
962 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; 882 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
963 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); 883 tfrc_lh_init(&hcrx->ccid3hcrx_li_hist);
964 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); 884 return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
965 hcrx->ccid3hcrx_tstamp_last_feedback =
966 hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
967 return 0;
968} 885}
969 886
970static void ccid3_hc_rx_exit(struct sock *sk) 887static void ccid3_hc_rx_exit(struct sock *sk)
@@ -973,11 +890,8 @@ static void ccid3_hc_rx_exit(struct sock *sk)
973 890
974 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); 891 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
975 892
976 /* Empty packet history */ 893 tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist);
977 dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); 894 tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist);
978
979 /* Empty loss interval history */
980 dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
981} 895}
982 896
983static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 897static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
@@ -998,6 +912,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
998 u32 __user *optval, int __user *optlen) 912 u32 __user *optval, int __user *optlen)
999{ 913{
1000 const struct ccid3_hc_rx_sock *hcrx; 914 const struct ccid3_hc_rx_sock *hcrx;
915 struct tfrc_rx_info rx_info;
1001 const void *val; 916 const void *val;
1002 917
1003 /* Listen socks doesn't have a private CCID block */ 918 /* Listen socks doesn't have a private CCID block */
@@ -1007,10 +922,14 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1007 hcrx = ccid3_hc_rx_sk(sk); 922 hcrx = ccid3_hc_rx_sk(sk);
1008 switch (optname) { 923 switch (optname) {
1009 case DCCP_SOCKOPT_CCID_RX_INFO: 924 case DCCP_SOCKOPT_CCID_RX_INFO:
1010 if (len < sizeof(hcrx->ccid3hcrx_tfrc)) 925 if (len < sizeof(rx_info))
1011 return -EINVAL; 926 return -EINVAL;
1012 len = sizeof(hcrx->ccid3hcrx_tfrc); 927 rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv;
1013 val = &hcrx->ccid3hcrx_tfrc; 928 rx_info.tfrcrx_rtt = hcrx->ccid3hcrx_rtt;
929 rx_info.tfrcrx_p = hcrx->ccid3hcrx_pinv == 0 ? ~0U :
930 scaled_div(1, hcrx->ccid3hcrx_pinv);
931 len = sizeof(rx_info);
932 val = &rx_info;
1014 break; 933 break;
1015 default: 934 default:
1016 return -ENOPROTOOPT; 935 return -ENOPROTOOPT;
@@ -1024,7 +943,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1024 943
1025static struct ccid_operations ccid3 = { 944static struct ccid_operations ccid3 = {
1026 .ccid_id = DCCPC_CCID3, 945 .ccid_id = DCCPC_CCID3,
1027 .ccid_name = "ccid3", 946 .ccid_name = "TCP-Friendly Rate Control",
1028 .ccid_owner = THIS_MODULE, 947 .ccid_owner = THIS_MODULE,
1029 .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), 948 .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
1030 .ccid_hc_tx_init = ccid3_hc_tx_init, 949 .ccid_hc_tx_init = ccid3_hc_tx_init,
@@ -1051,44 +970,13 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1051 970
1052static __init int ccid3_module_init(void) 971static __init int ccid3_module_init(void)
1053{ 972{
1054 int rc = -ENOBUFS; 973 return ccid_register(&ccid3);
1055
1056 ccid3_rx_hist = dccp_rx_hist_new("ccid3");
1057 if (ccid3_rx_hist == NULL)
1058 goto out;
1059
1060 ccid3_tx_hist = dccp_tx_hist_new("ccid3");
1061 if (ccid3_tx_hist == NULL)
1062 goto out_free_rx;
1063
1064 rc = ccid_register(&ccid3);
1065 if (rc != 0)
1066 goto out_free_tx;
1067out:
1068 return rc;
1069
1070out_free_tx:
1071 dccp_tx_hist_delete(ccid3_tx_hist);
1072 ccid3_tx_hist = NULL;
1073out_free_rx:
1074 dccp_rx_hist_delete(ccid3_rx_hist);
1075 ccid3_rx_hist = NULL;
1076 goto out;
1077} 974}
1078module_init(ccid3_module_init); 975module_init(ccid3_module_init);
1079 976
1080static __exit void ccid3_module_exit(void) 977static __exit void ccid3_module_exit(void)
1081{ 978{
1082 ccid_unregister(&ccid3); 979 ccid_unregister(&ccid3);
1083
1084 if (ccid3_tx_hist != NULL) {
1085 dccp_tx_hist_delete(ccid3_tx_hist);
1086 ccid3_tx_hist = NULL;
1087 }
1088 if (ccid3_rx_hist != NULL) {
1089 dccp_rx_hist_delete(ccid3_rx_hist);
1090 ccid3_rx_hist = NULL;
1091 }
1092} 980}
1093module_exit(ccid3_module_exit); 981module_exit(ccid3_module_exit);
1094 982
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0cdc982cfe47..49ca32bd7e79 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,7 +1,8 @@
1/* 1/*
2 * net/dccp/ccids/ccid3.h 2 * net/dccp/ccids/ccid3.h
3 * 3 *
4 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. 4 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
5 * 6 *
6 * An implementation of the DCCP protocol 7 * An implementation of the DCCP protocol
7 * 8 *
@@ -40,6 +41,7 @@
40#include <linux/list.h> 41#include <linux/list.h>
41#include <linux/types.h> 42#include <linux/types.h>
42#include <linux/tfrc.h> 43#include <linux/tfrc.h>
44#include "lib/tfrc.h"
43#include "../ccid.h" 45#include "../ccid.h"
44 46
45/* Two seconds as per RFC 3448 4.2 */ 47/* Two seconds as per RFC 3448 4.2 */
@@ -88,7 +90,6 @@ enum ccid3_hc_tx_states {
88 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet 90 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
89 * with last_win_count value sent 91 * with last_win_count value sent
90 * @ccid3hctx_no_feedback_timer - Handle to no feedback timer 92 * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
91 * @ccid3hctx_idle - Flag indicating that sender is idling
92 * @ccid3hctx_t_ld - Time last doubled during slow start 93 * @ccid3hctx_t_ld - Time last doubled during slow start
93 * @ccid3hctx_t_nom - Nominal send time of next packet 94 * @ccid3hctx_t_nom - Nominal send time of next packet
94 * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs 95 * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
@@ -107,13 +108,12 @@ struct ccid3_hc_tx_sock {
107 u16 ccid3hctx_s; 108 u16 ccid3hctx_s;
108 enum ccid3_hc_tx_states ccid3hctx_state:8; 109 enum ccid3_hc_tx_states ccid3hctx_state:8;
109 u8 ccid3hctx_last_win_count; 110 u8 ccid3hctx_last_win_count;
110 u8 ccid3hctx_idle;
111 ktime_t ccid3hctx_t_last_win_count; 111 ktime_t ccid3hctx_t_last_win_count;
112 struct timer_list ccid3hctx_no_feedback_timer; 112 struct timer_list ccid3hctx_no_feedback_timer;
113 ktime_t ccid3hctx_t_ld; 113 ktime_t ccid3hctx_t_ld;
114 ktime_t ccid3hctx_t_nom; 114 ktime_t ccid3hctx_t_nom;
115 u32 ccid3hctx_delta; 115 u32 ccid3hctx_delta;
116 struct list_head ccid3hctx_hist; 116 struct tfrc_tx_hist_entry *ccid3hctx_hist;
117 struct ccid3_options_received ccid3hctx_options_received; 117 struct ccid3_options_received ccid3hctx_options_received;
118}; 118};
119 119
@@ -135,37 +135,30 @@ enum ccid3_hc_rx_states {
135 * 135 *
136 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) 136 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
137 * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) 137 * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
138 * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4) 138 * @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4)
139 * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number 139 * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1)
140 * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal 140 * @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states
141 * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1)
142 * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states
143 * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes 141 * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
142 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
143 * @ccid3hcrx_rtt - Receiver estimate of RTT
144 * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent 144 * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
145 * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent 145 * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
146 * @ccid3hcrx_hist - Packet history 146 * @ccid3hcrx_hist - Packet history (loss detection + RTT sampling)
147 * @ccid3hcrx_li_hist - Loss Interval History 147 * @ccid3hcrx_li_hist - Loss Interval database
148 * @ccid3hcrx_s - Received packet size in bytes 148 * @ccid3hcrx_s - Received packet size in bytes
149 * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) 149 * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
150 * @ccid3hcrx_elapsed_time - Time since packet reception
151 */ 150 */
152struct ccid3_hc_rx_sock { 151struct ccid3_hc_rx_sock {
153 struct tfrc_rx_info ccid3hcrx_tfrc; 152 u8 ccid3hcrx_last_counter:4;
154#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
155#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
156#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
157 u64 ccid3hcrx_seqno_nonloss:48,
158 ccid3hcrx_ccval_nonloss:4,
159 ccid3hcrx_ccval_last_counter:4;
160 enum ccid3_hc_rx_states ccid3hcrx_state:8; 153 enum ccid3_hc_rx_states ccid3hcrx_state:8;
161 u32 ccid3hcrx_bytes_recv; 154 u32 ccid3hcrx_bytes_recv;
155 u32 ccid3hcrx_x_recv;
156 u32 ccid3hcrx_rtt;
162 ktime_t ccid3hcrx_tstamp_last_feedback; 157 ktime_t ccid3hcrx_tstamp_last_feedback;
163 ktime_t ccid3hcrx_tstamp_last_ack; 158 struct tfrc_rx_hist ccid3hcrx_hist;
164 struct list_head ccid3hcrx_hist; 159 struct tfrc_loss_hist ccid3hcrx_li_hist;
165 struct list_head ccid3hcrx_li_hist;
166 u16 ccid3hcrx_s; 160 u16 ccid3hcrx_s;
167 u32 ccid3hcrx_pinv; 161#define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean
168 u32 ccid3hcrx_elapsed_time;
169}; 162};
170 163
171static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) 164static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
index 5f940a6cbaca..68c93e3d89dc 100644
--- a/net/dccp/ccids/lib/Makefile
+++ b/net/dccp/ccids/lib/Makefile
@@ -1,3 +1,3 @@
1obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o 1obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
2 2
3dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o 3dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 40ad428a27f5..849e181e698f 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * net/dccp/ccids/lib/loss_interval.c 2 * net/dccp/ccids/lib/loss_interval.c
3 * 3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
4 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. 5 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> 6 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
@@ -10,285 +11,176 @@
10 * the Free Software Foundation; either version 2 of the License, or 11 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version. 12 * (at your option) any later version.
12 */ 13 */
13
14#include <linux/module.h>
15#include <net/sock.h> 14#include <net/sock.h>
16#include "../../dccp.h"
17#include "loss_interval.h"
18#include "packet_history.h"
19#include "tfrc.h" 15#include "tfrc.h"
20 16
21#define DCCP_LI_HIST_IVAL_F_LENGTH 8 17static struct kmem_cache *tfrc_lh_slab __read_mostly;
22 18/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */
23struct dccp_li_hist_entry { 19static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 };
24 struct list_head dccplih_node;
25 u64 dccplih_seqno:48,
26 dccplih_win_count:4;
27 u32 dccplih_interval;
28};
29 20
30static struct kmem_cache *dccp_li_cachep __read_mostly; 21/* implements LIFO semantics on the array */
31 22static inline u8 LIH_INDEX(const u8 ctr)
32static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
33{ 23{
34 return kmem_cache_alloc(dccp_li_cachep, prio); 24 return (LIH_SIZE - 1 - (ctr % LIH_SIZE));
35} 25}
36 26
37static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) 27/* the `counter' index always points at the next entry to be populated */
28static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist *lh)
38{ 29{
39 if (entry != NULL) 30 return lh->counter ? lh->ring[LIH_INDEX(lh->counter - 1)] : NULL;
40 kmem_cache_free(dccp_li_cachep, entry);
41} 31}
42 32
43void dccp_li_hist_purge(struct list_head *list) 33/* given i with 0 <= i <= k, return I_i as per the rfc3448bis notation */
34static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i)
44{ 35{
45 struct dccp_li_hist_entry *entry, *next; 36 BUG_ON(i >= lh->counter);
46 37 return lh->ring[LIH_INDEX(lh->counter - i - 1)]->li_length;
47 list_for_each_entry_safe(entry, next, list, dccplih_node) {
48 list_del_init(&entry->dccplih_node);
49 kmem_cache_free(dccp_li_cachep, entry);
50 }
51} 38}
52 39
53EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
54
55/* Weights used to calculate loss event rate */
56/* 40/*
57 * These are integers as per section 8 of RFC3448. We can then divide by 4 * 41 * On-demand allocation and de-allocation of entries
58 * when we use it.
59 */ 42 */
60static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { 43static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist *lh)
61 4, 4, 4, 4, 3, 2, 1, 1,
62};
63
64u32 dccp_li_hist_calc_i_mean(struct list_head *list)
65{ 44{
66 struct dccp_li_hist_entry *li_entry, *li_next; 45 if (lh->ring[LIH_INDEX(lh->counter)] == NULL)
67 int i = 0; 46 lh->ring[LIH_INDEX(lh->counter)] = kmem_cache_alloc(tfrc_lh_slab,
68 u32 i_tot; 47 GFP_ATOMIC);
69 u32 i_tot0 = 0; 48 return lh->ring[LIH_INDEX(lh->counter)];
70 u32 i_tot1 = 0;
71 u32 w_tot = 0;
72
73 list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
74 if (li_entry->dccplih_interval != ~0U) {
75 i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
76 w_tot += dccp_li_hist_w[i];
77 if (i != 0)
78 i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
79 }
80
81
82 if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
83 break;
84 }
85
86 if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
87 return 0;
88
89 i_tot = max(i_tot0, i_tot1);
90
91 if (!w_tot) {
92 DCCP_WARN("w_tot = 0\n");
93 return 1;
94 }
95
96 return i_tot / w_tot;
97} 49}
98 50
99EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); 51void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
100
101static int dccp_li_hist_interval_new(struct list_head *list,
102 const u64 seq_loss, const u8 win_loss)
103{ 52{
104 struct dccp_li_hist_entry *entry; 53 if (!tfrc_lh_is_initialised(lh))
105 int i; 54 return;
106 55
107 for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { 56 for (lh->counter = 0; lh->counter < LIH_SIZE; lh->counter++)
108 entry = dccp_li_hist_entry_new(GFP_ATOMIC); 57 if (lh->ring[LIH_INDEX(lh->counter)] != NULL) {
109 if (entry == NULL) { 58 kmem_cache_free(tfrc_lh_slab,
110 dccp_li_hist_purge(list); 59 lh->ring[LIH_INDEX(lh->counter)]);
111 DCCP_BUG("loss interval list entry is NULL"); 60 lh->ring[LIH_INDEX(lh->counter)] = NULL;
112 return 0;
113 } 61 }
114 entry->dccplih_interval = ~0;
115 list_add(&entry->dccplih_node, list);
116 }
117
118 entry->dccplih_seqno = seq_loss;
119 entry->dccplih_win_count = win_loss;
120 return 1;
121} 62}
63EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
122 64
123/* calculate first loss interval 65static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
124 *
125 * returns estimated loss interval in usecs */
126static u32 dccp_li_calc_first_li(struct sock *sk,
127 struct list_head *hist_list,
128 ktime_t last_feedback,
129 u16 s, u32 bytes_recv,
130 u32 previous_x_recv)
131{ 66{
132 struct dccp_rx_hist_entry *entry, *next, *tail = NULL; 67 u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
133 u32 x_recv, p; 68 int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */
134 suseconds_t rtt, delta;
135 ktime_t tstamp = ktime_set(0, 0);
136 int interval = 0;
137 int win_count = 0;
138 int step = 0;
139 u64 fval;
140 69
141 list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { 70 for (i=0; i <= k; i++) {
142 if (dccp_rx_hist_entry_data_packet(entry)) { 71 i_i = tfrc_lh_get_interval(lh, i);
143 tail = entry;
144 72
145 switch (step) { 73 if (i < k) {
146 case 0: 74 i_tot0 += i_i * tfrc_lh_weights[i];
147 tstamp = entry->dccphrx_tstamp; 75 w_tot += tfrc_lh_weights[i];
148 win_count = entry->dccphrx_ccval;
149 step = 1;
150 break;
151 case 1:
152 interval = win_count - entry->dccphrx_ccval;
153 if (interval < 0)
154 interval += TFRC_WIN_COUNT_LIMIT;
155 if (interval > 4)
156 goto found;
157 break;
158 }
159 } 76 }
77 if (i > 0)
78 i_tot1 += i_i * tfrc_lh_weights[i-1];
160 } 79 }
161 80
162 if (unlikely(step == 0)) { 81 BUG_ON(w_tot == 0);
163 DCCP_WARN("%s(%p), packet history has no data packets!\n", 82 lh->i_mean = max(i_tot0, i_tot1) / w_tot;
164 dccp_role(sk), sk); 83}
165 return ~0;
166 }
167
168 if (unlikely(interval == 0)) {
169 DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
170 "Defaulting to 1\n", dccp_role(sk), sk);
171 interval = 1;
172 }
173found:
174 if (!tail) {
175 DCCP_CRIT("tail is null\n");
176 return ~0;
177 }
178
179 delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp);
180 DCCP_BUG_ON(delta < 0);
181 84
182 rtt = delta * 4 / interval; 85/**
183 dccp_pr_debug("%s(%p), approximated RTT to %dus\n", 86 * tfrc_lh_update_i_mean - Update the `open' loss interval I_0
184 dccp_role(sk), sk, (int)rtt); 87 * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
88 */
89u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
90{
91 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
92 u32 old_i_mean = lh->i_mean;
93 s64 length;
185 94
186 /* 95 if (cur == NULL) /* not initialised */
187 * Determine the length of the first loss interval via inverse lookup. 96 return 0;
188 * Assume that X_recv can be computed by the throughput equation
189 * s
190 * X_recv = --------
191 * R * fval
192 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
193 */
194 if (rtt == 0) { /* would result in divide-by-zero */
195 DCCP_WARN("RTT==0\n");
196 return ~0;
197 }
198 97
199 delta = ktime_us_delta(ktime_get_real(), last_feedback); 98 length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq);
200 DCCP_BUG_ON(delta <= 0);
201 99
202 x_recv = scaled_div32(bytes_recv, delta); 100 if (length - cur->li_length <= 0) /* duplicate or reordered */
203 if (x_recv == 0) { /* would also trigger divide-by-zero */ 101 return 0;
204 DCCP_WARN("X_recv==0\n");
205 if (previous_x_recv == 0) {
206 DCCP_BUG("stored value of X_recv is zero");
207 return ~0;
208 }
209 x_recv = previous_x_recv;
210 }
211 102
212 fval = scaled_div(s, rtt); 103 if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
213 fval = scaled_div32(fval, x_recv); 104 /*
214 p = tfrc_calc_x_reverse_lookup(fval); 105 * Implements RFC 4342, 10.2:
106 * If a packet S (skb) exists whose seqno comes `after' the one
107 * starting the current loss interval (cur) and if the modulo-16
108 * distance from C(cur) to C(S) is greater than 4, consider all
109 * subsequent packets as belonging to a new loss interval. This
110 * test is necessary since CCVal may wrap between intervals.
111 */
112 cur->li_is_closed = 1;
113
114 if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
115 return 0;
215 116
216 dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " 117 cur->li_length = length;
217 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); 118 tfrc_lh_calc_i_mean(lh);
218 119
219 if (p == 0) 120 return (lh->i_mean < old_i_mean);
220 return ~0;
221 else
222 return 1000000 / p;
223} 121}
122EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
224 123
225void dccp_li_update_li(struct sock *sk, 124/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
226 struct list_head *li_hist_list, 125static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
227 struct list_head *hist_list, 126 struct tfrc_rx_hist_entry *new_loss)
228 ktime_t last_feedback, u16 s, u32 bytes_recv,
229 u32 previous_x_recv, u64 seq_loss, u8 win_loss)
230{ 127{
231 struct dccp_li_hist_entry *head; 128 return dccp_delta_seqno(cur->li_seqno, new_loss->tfrchrx_seqno) > 0 &&
232 u64 seq_temp; 129 (cur->li_is_closed || SUB16(new_loss->tfrchrx_ccval, cur->li_ccval) > 4);
233 130}
234 if (list_empty(li_hist_list)) {
235 if (!dccp_li_hist_interval_new(li_hist_list, seq_loss,
236 win_loss))
237 return;
238
239 head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
240 dccplih_node);
241 head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list,
242 last_feedback,
243 s, bytes_recv,
244 previous_x_recv);
245 } else {
246 struct dccp_li_hist_entry *entry;
247 struct list_head *tail;
248 131
249 head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, 132/** tfrc_lh_interval_add - Insert new record into the Loss Interval database
250 dccplih_node); 133 * @lh: Loss Interval database
251 /* FIXME win count check removed as was wrong */ 134 * @rh: Receive history containing a fresh loss event
252 /* should make this check with receive history */ 135 * @calc_first_li: Caller-dependent routine to compute length of first interval
253 /* and compare there as per section 10.2 of RFC4342 */ 136 * @sk: Used by @calc_first_li in caller-specific way (subtyping)
137 * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
138 */
139int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
140 u32 (*calc_first_li)(struct sock *), struct sock *sk)
141{
142 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
254 143
255 /* new loss event detected */ 144 if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
256 /* calculate last interval length */ 145 return 0;
257 seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
258 entry = dccp_li_hist_entry_new(GFP_ATOMIC);
259 146
260 if (entry == NULL) { 147 new = tfrc_lh_demand_next(lh);
261 DCCP_BUG("out of memory - can not allocate entry"); 148 if (unlikely(new == NULL)) {
262 return; 149 DCCP_CRIT("Cannot allocate/add loss record.");
263 } 150 return 0;
151 }
264 152
265 list_add(&entry->dccplih_node, li_hist_list); 153 new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
154 new->li_ccval = tfrc_rx_hist_loss_prev(rh)->tfrchrx_ccval;
155 new->li_is_closed = 0;
266 156
267 tail = li_hist_list->prev; 157 if (++lh->counter == 1)
268 list_del(tail); 158 lh->i_mean = new->li_length = (*calc_first_li)(sk);
269 kmem_cache_free(dccp_li_cachep, tail); 159 else {
160 cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno);
161 new->li_length = dccp_delta_seqno(new->li_seqno,
162 tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno);
163 if (lh->counter > (2*LIH_SIZE))
164 lh->counter -= LIH_SIZE;
270 165
271 /* Create the newest interval */ 166 tfrc_lh_calc_i_mean(lh);
272 entry->dccplih_seqno = seq_loss;
273 entry->dccplih_interval = seq_temp;
274 entry->dccplih_win_count = win_loss;
275 } 167 }
168 return 1;
276} 169}
170EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
277 171
278EXPORT_SYMBOL_GPL(dccp_li_update_li); 172int __init tfrc_li_init(void)
279
280static __init int dccp_li_init(void)
281{ 173{
282 dccp_li_cachep = kmem_cache_create("dccp_li_hist", 174 tfrc_lh_slab = kmem_cache_create("tfrc_li_hist",
283 sizeof(struct dccp_li_hist_entry), 175 sizeof(struct tfrc_loss_interval), 0,
284 0, SLAB_HWCACHE_ALIGN, NULL); 176 SLAB_HWCACHE_ALIGN, NULL);
285 return dccp_li_cachep == NULL ? -ENOBUFS : 0; 177 return tfrc_lh_slab == NULL ? -ENOBUFS : 0;
286} 178}
287 179
288static __exit void dccp_li_exit(void) 180void tfrc_li_exit(void)
289{ 181{
290 kmem_cache_destroy(dccp_li_cachep); 182 if (tfrc_lh_slab != NULL) {
183 kmem_cache_destroy(tfrc_lh_slab);
184 tfrc_lh_slab = NULL;
185 }
291} 186}
292
293module_init(dccp_li_init);
294module_exit(dccp_li_exit);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 27bee92dae13..246018a3b269 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -3,6 +3,7 @@
3/* 3/*
4 * net/dccp/ccids/lib/loss_interval.h 4 * net/dccp/ccids/lib/loss_interval.h
5 * 5 *
6 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
6 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. 7 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
7 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> 8 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 9 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
@@ -12,18 +13,63 @@
12 * Software Foundation; either version 2 of the License, or (at your option) 13 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version. 14 * any later version.
14 */ 15 */
15
16#include <linux/ktime.h> 16#include <linux/ktime.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/slab.h>
19
20/*
21 * Number of loss intervals (RFC 4342, 8.6.1). The history size is one more than
22 * NINTERVAL, since the `open' interval I_0 is always stored as the first entry.
23 */
24#define NINTERVAL 8
25#define LIH_SIZE (NINTERVAL + 1)
26
27/**
28 * tfrc_loss_interval - Loss history record for TFRC-based protocols
29 * @li_seqno: Highest received seqno before the start of loss
30 * @li_ccval: The CCVal belonging to @li_seqno
31 * @li_is_closed: Whether @li_seqno is older than 1 RTT
32 * @li_length: Loss interval sequence length
33 */
34struct tfrc_loss_interval {
35 u64 li_seqno:48,
36 li_ccval:4,
37 li_is_closed:1;
38 u32 li_length;
39};
40
41/**
42 * tfrc_loss_hist - Loss record database
43 * @ring: Circular queue managed in LIFO manner
44 * @counter: Current count of entries (can be more than %LIH_SIZE)
45 * @i_mean: Current Average Loss Interval [RFC 3448, 5.4]
46 */
47struct tfrc_loss_hist {
48 struct tfrc_loss_interval *ring[LIH_SIZE];
49 u8 counter;
50 u32 i_mean;
51};
52
53static inline void tfrc_lh_init(struct tfrc_loss_hist *lh)
54{
55 memset(lh, 0, sizeof(struct tfrc_loss_hist));
56}
57
58static inline u8 tfrc_lh_is_initialised(struct tfrc_loss_hist *lh)
59{
60 return lh->counter > 0;
61}
62
63static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
64{
65 return min(lh->counter, (u8)LIH_SIZE);
66}
18 67
19extern void dccp_li_hist_purge(struct list_head *list); 68struct tfrc_rx_hist;
20 69
21extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); 70extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
71 u32 (*first_li)(struct sock *), struct sock *);
72extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
73extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
22 74
23extern void dccp_li_update_li(struct sock *sk,
24 struct list_head *li_hist_list,
25 struct list_head *hist_list,
26 ktime_t last_feedback, u16 s,
27 u32 bytes_recv, u32 previous_x_recv,
28 u64 seq_loss, u8 win_loss);
29#endif /* _DCCP_LI_HIST_ */ 75#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 34c4f6047724..20af1a693427 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * net/dccp/packet_history.c 2 * net/dccp/packet_history.c
3 * 3 *
4 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. 4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
5 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * 6 *
6 * An implementation of the DCCP protocol 7 * An implementation of the DCCP protocol
7 * 8 *
@@ -34,267 +35,465 @@
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 35 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */ 36 */
36 37
37#include <linux/module.h>
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/slab.h>
39#include "packet_history.h" 40#include "packet_history.h"
41#include "../../dccp.h"
42
43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
40 54
41/* 55/*
42 * Transmitter History Routines 56 * Transmitter History Routines
43 */ 57 */
44struct dccp_tx_hist *dccp_tx_hist_new(const char *name) 58static struct kmem_cache *tfrc_tx_hist_slab;
59
60int __init tfrc_tx_packet_history_init(void)
45{ 61{
46 struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); 62 tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
47 static const char dccp_tx_hist_mask[] = "tx_hist_%s"; 63 sizeof(struct tfrc_tx_hist_entry),
48 char *slab_name; 64 0, SLAB_HWCACHE_ALIGN, NULL);
49 65 return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
50 if (hist == NULL)
51 goto out;
52
53 slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
54 GFP_ATOMIC);
55 if (slab_name == NULL)
56 goto out_free_hist;
57
58 sprintf(slab_name, dccp_tx_hist_mask, name);
59 hist->dccptxh_slab = kmem_cache_create(slab_name,
60 sizeof(struct dccp_tx_hist_entry),
61 0, SLAB_HWCACHE_ALIGN,
62 NULL);
63 if (hist->dccptxh_slab == NULL)
64 goto out_free_slab_name;
65out:
66 return hist;
67out_free_slab_name:
68 kfree(slab_name);
69out_free_hist:
70 kfree(hist);
71 hist = NULL;
72 goto out;
73} 66}
74 67
75EXPORT_SYMBOL_GPL(dccp_tx_hist_new); 68void tfrc_tx_packet_history_exit(void)
76
77void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
78{ 69{
79 const char* name = kmem_cache_name(hist->dccptxh_slab); 70 if (tfrc_tx_hist_slab != NULL) {
80 71 kmem_cache_destroy(tfrc_tx_hist_slab);
81 kmem_cache_destroy(hist->dccptxh_slab); 72 tfrc_tx_hist_slab = NULL;
82 kfree(name); 73 }
83 kfree(hist);
84} 74}
85 75
86EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); 76static struct tfrc_tx_hist_entry *
87 77 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
88struct dccp_tx_hist_entry *
89 dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
90{ 78{
91 struct dccp_tx_hist_entry *packet = NULL, *entry; 79 while (head != NULL && head->seqno != seqno)
92 80 head = head->next;
93 list_for_each_entry(entry, list, dccphtx_node)
94 if (entry->dccphtx_seqno == seq) {
95 packet = entry;
96 break;
97 }
98 81
99 return packet; 82 return head;
100} 83}
101 84
102EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); 85int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
86{
87 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
88
89 if (entry == NULL)
90 return -ENOBUFS;
91 entry->seqno = seqno;
92 entry->stamp = ktime_get_real();
93 entry->next = *headp;
94 *headp = entry;
95 return 0;
96}
97EXPORT_SYMBOL_GPL(tfrc_tx_hist_add);
103 98
104void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) 99void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
105{ 100{
106 struct dccp_tx_hist_entry *entry, *next; 101 struct tfrc_tx_hist_entry *head = *headp;
102
103 while (head != NULL) {
104 struct tfrc_tx_hist_entry *next = head->next;
107 105
108 list_for_each_entry_safe(entry, next, list, dccphtx_node) { 106 kmem_cache_free(tfrc_tx_hist_slab, head);
109 list_del_init(&entry->dccphtx_node); 107 head = next;
110 dccp_tx_hist_entry_delete(hist, entry);
111 } 108 }
112}
113 109
114EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); 110 *headp = NULL;
111}
112EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
115 113
116void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, 114u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
117 struct list_head *list, 115 const ktime_t now)
118 struct dccp_tx_hist_entry *packet)
119{ 116{
120 struct dccp_tx_hist_entry *next; 117 u32 rtt = 0;
118 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
121 119
122 list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { 120 if (packet != NULL) {
123 list_del_init(&packet->dccphtx_node); 121 rtt = ktime_us_delta(now, packet->stamp);
124 dccp_tx_hist_entry_delete(hist, packet); 122 /*
123 * Garbage-collect older (irrelevant) entries:
124 */
125 tfrc_tx_hist_purge(&packet->next);
125 } 126 }
127
128 return rtt;
126} 129}
130EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
127 131
128EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
129 132
130/* 133/*
131 * Receiver History Routines 134 * Receiver History Routines
132 */ 135 */
133struct dccp_rx_hist *dccp_rx_hist_new(const char *name) 136static struct kmem_cache *tfrc_rx_hist_slab;
137
138int __init tfrc_rx_packet_history_init(void)
134{ 139{
135 struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); 140 tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache",
136 static const char dccp_rx_hist_mask[] = "rx_hist_%s"; 141 sizeof(struct tfrc_rx_hist_entry),
137 char *slab_name; 142 0, SLAB_HWCACHE_ALIGN, NULL);
138 143 return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0;
139 if (hist == NULL)
140 goto out;
141
142 slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
143 GFP_ATOMIC);
144 if (slab_name == NULL)
145 goto out_free_hist;
146
147 sprintf(slab_name, dccp_rx_hist_mask, name);
148 hist->dccprxh_slab = kmem_cache_create(slab_name,
149 sizeof(struct dccp_rx_hist_entry),
150 0, SLAB_HWCACHE_ALIGN,
151 NULL);
152 if (hist->dccprxh_slab == NULL)
153 goto out_free_slab_name;
154out:
155 return hist;
156out_free_slab_name:
157 kfree(slab_name);
158out_free_hist:
159 kfree(hist);
160 hist = NULL;
161 goto out;
162} 144}
163 145
164EXPORT_SYMBOL_GPL(dccp_rx_hist_new); 146void tfrc_rx_packet_history_exit(void)
147{
148 if (tfrc_rx_hist_slab != NULL) {
149 kmem_cache_destroy(tfrc_rx_hist_slab);
150 tfrc_rx_hist_slab = NULL;
151 }
152}
165 153
166void dccp_rx_hist_delete(struct dccp_rx_hist *hist) 154static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
155 const struct sk_buff *skb,
156 const u32 ndp)
167{ 157{
168 const char* name = kmem_cache_name(hist->dccprxh_slab); 158 const struct dccp_hdr *dh = dccp_hdr(skb);
169 159
170 kmem_cache_destroy(hist->dccprxh_slab); 160 entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
171 kfree(name); 161 entry->tfrchrx_ccval = dh->dccph_ccval;
172 kfree(hist); 162 entry->tfrchrx_type = dh->dccph_type;
163 entry->tfrchrx_ndp = ndp;
164 entry->tfrchrx_tstamp = ktime_get_real();
173} 165}
174 166
175EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); 167void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
168 const struct sk_buff *skb,
169 const u32 ndp)
170{
171 struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
172
173 tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
174}
175EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
176 176
177int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, 177/* has the packet contained in skb been seen before? */
178 u8 *ccval) 178int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
179{ 179{
180 struct dccp_rx_hist_entry *packet = NULL, *entry; 180 const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
181 int i;
181 182
182 list_for_each_entry(entry, list, dccphrx_node) 183 if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
183 if (entry->dccphrx_seqno == seq) { 184 return 1;
184 packet = entry;
185 break;
186 }
187 185
188 if (packet) 186 for (i = 1; i <= h->loss_count; i++)
189 *ccval = packet->dccphrx_ccval; 187 if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
188 return 1;
190 189
191 return packet != NULL; 190 return 0;
192} 191}
192EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
193 193
194EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); 194static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
195struct dccp_rx_hist_entry *
196 dccp_rx_hist_find_data_packet(const struct list_head *list)
197{ 195{
198 struct dccp_rx_hist_entry *entry, *packet = NULL; 196 const u8 idx_a = tfrc_rx_hist_index(h, a),
199 197 idx_b = tfrc_rx_hist_index(h, b);
200 list_for_each_entry(entry, list, dccphrx_node) 198 struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
201 if (entry->dccphrx_type == DCCP_PKT_DATA ||
202 entry->dccphrx_type == DCCP_PKT_DATAACK) {
203 packet = entry;
204 break;
205 }
206 199
207 return packet; 200 h->ring[idx_a] = h->ring[idx_b];
201 h->ring[idx_b] = tmp;
208} 202}
209 203
210EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); 204/*
205 * Private helper functions for loss detection.
206 *
207 * In the descriptions, `Si' refers to the sequence number of entry number i,
208 * whose NDP count is `Ni' (lower case is used for variables).
209 * Note: All __after_loss functions expect that a test against duplicates has
210 * been performed already: the seqno of the skb must not be less than the
211 * seqno of loss_prev; and it must not equal that of any valid hist_entry.
212 */
213static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
214{
215 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
216 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
217 s2 = DCCP_SKB_CB(skb)->dccpd_seq;
218 int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
219 d12 = dccp_delta_seqno(s1, s2), d2;
220
221 if (d12 > 0) { /* S1 < S2 */
222 h->loss_count = 2;
223 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
224 return;
225 }
226
227 /* S0 < S2 < S1 */
228 d2 = dccp_delta_seqno(s0, s2);
211 229
212void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, 230 if (d2 == 1 || n2 >= d2) { /* S2 is direct successor of S0 */
213 struct list_head *rx_list, 231 int d21 = -d12;
214 struct list_head *li_list, 232
215 struct dccp_rx_hist_entry *packet, 233 if (d21 == 1 || n1 >= d21) {
216 u64 nonloss_seqno) 234 /* hole is filled: S0, S2, and S1 are consecutive */
235 h->loss_count = 0;
236 h->loss_start = tfrc_rx_hist_index(h, 1);
237 } else
238 /* gap between S2 and S1: just update loss_prev */
239 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
240
241 } else { /* hole between S0 and S2 */
242 /*
243 * Reorder history to insert S2 between S0 and s1
244 */
245 tfrc_rx_hist_swap(h, 0, 3);
246 h->loss_start = tfrc_rx_hist_index(h, 3);
247 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2);
248 h->loss_count = 2;
249 }
250}
251
252/* return 1 if a new loss event has been identified */
253static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
217{ 254{
218 struct dccp_rx_hist_entry *entry, *next; 255 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
219 u8 num_later = 0; 256 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
220 257 s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
221 list_add(&packet->dccphrx_node, rx_list); 258 s3 = DCCP_SKB_CB(skb)->dccpd_seq;
222 259 int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
223 num_later = TFRC_RECV_NUM_LATE_LOSS + 1; 260 d23 = dccp_delta_seqno(s2, s3), d13, d3, d31;
224 261
225 if (!list_empty(li_list)) { 262 if (d23 > 0) { /* S2 < S3 */
226 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { 263 h->loss_count = 3;
227 if (num_later == 0) { 264 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
228 if (after48(nonloss_seqno, 265 return 1;
229 entry->dccphrx_seqno)) { 266 }
230 list_del_init(&entry->dccphrx_node); 267
231 dccp_rx_hist_entry_delete(hist, entry); 268 /* S3 < S2 */
232 } 269 d13 = dccp_delta_seqno(s1, s3);
233 } else if (dccp_rx_hist_entry_data_packet(entry)) 270
234 --num_later; 271 if (d13 > 0) {
235 }
236 } else {
237 int step = 0;
238 u8 win_count = 0; /* Not needed, but lets shut up gcc */
239 int tmp;
240 /* 272 /*
241 * We have no loss interval history so we need at least one 273 * The sequence number order is S1, S3, S2
242 * rtt:s of data packets to approximate rtt. 274 * Reorder history to insert entry between S1 and S2
243 */ 275 */
244 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { 276 tfrc_rx_hist_swap(h, 2, 3);
245 if (num_later == 0) { 277 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
246 switch (step) { 278 h->loss_count = 3;
247 case 0: 279 return 1;
248 step = 1; 280 }
249 /* OK, find next data packet */ 281
250 num_later = 1; 282 /* S0 < S3 < S1 */
251 break; 283 d31 = -d13;
252 case 1: 284 d3 = dccp_delta_seqno(s0, s3);
253 step = 2; 285
254 /* OK, find next data packet */ 286 if (d3 == 1 || n3 >= d3) { /* S3 is a successor of S0 */
255 num_later = 1; 287
256 win_count = entry->dccphrx_ccval; 288 if (d31 == 1 || n1 >= d31) {
257 break; 289 /* hole between S0 and S1 filled by S3 */
258 case 2: 290 int d2 = dccp_delta_seqno(s1, s2),
259 tmp = win_count - entry->dccphrx_ccval; 291 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
260 if (tmp < 0) 292
261 tmp += TFRC_WIN_COUNT_LIMIT; 293 if (d2 == 1 || n2 >= d2) {
262 if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { 294 /* entire hole filled by S0, S3, S1, S2 */
263 /* 295 h->loss_start = tfrc_rx_hist_index(h, 2);
264 * We have found a packet older 296 h->loss_count = 0;
265 * than one rtt remove the rest 297 } else {
266 */ 298 /* gap remains between S1 and S2 */
267 step = 3; 299 h->loss_start = tfrc_rx_hist_index(h, 1);
268 } else /* OK, find next data packet */ 300 h->loss_count = 1;
269 num_later = 1; 301 }
270 break; 302
271 case 3: 303 } else /* gap exists between S3 and S1, loss_count stays at 2 */
272 list_del_init(&entry->dccphrx_node); 304 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3);
273 dccp_rx_hist_entry_delete(hist, entry); 305
274 break; 306 return 0;
275 } 307 }
276 } else if (dccp_rx_hist_entry_data_packet(entry)) 308
277 --num_later; 309 /*
310 * The remaining case: S3 is not a successor of S0.
311 * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1
312 */
313 tfrc_rx_hist_swap(h, 0, 3);
314 h->loss_start = tfrc_rx_hist_index(h, 3);
315 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3);
316 h->loss_count = 3;
317
318 return 1;
319}
320
321/* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */
322static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2)
323{
324 DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count);
325
326 return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno,
327 tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno);
328}
329
330/* recycle RX history records to continue loss detection if necessary */
331static void __three_after_loss(struct tfrc_rx_hist *h)
332{
333 /*
334 * The distance between S0 and S1 is always greater than 1 and the NDP
335 * count of S1 is smaller than this distance. Otherwise there would
336 * have been no loss. Hence it is only necessary to see whether there
337 * are further missing data packets between S1/S2 and S2/S3.
338 */
339 int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2),
340 d3 = tfrc_rx_hist_delta_seqno(h, 2, 3),
341 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
342 n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;
343
344 if (d2 == 1 || n2 >= d2) { /* S2 is successor to S1 */
345
346 if (d3 == 1 || n3 >= d3) {
347 /* S3 is successor of S2: entire hole is filled */
348 h->loss_start = tfrc_rx_hist_index(h, 3);
349 h->loss_count = 0;
350 } else {
351 /* gap between S2 and S3 */
352 h->loss_start = tfrc_rx_hist_index(h, 2);
353 h->loss_count = 1;
278 } 354 }
355
356 } else { /* gap between S1 and S2 */
357 h->loss_start = tfrc_rx_hist_index(h, 1);
358 h->loss_count = 2;
279 } 359 }
280} 360}
281 361
282EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); 362/**
363 * tfrc_rx_handle_loss - Loss detection and further processing
364 * @h: The non-empty RX history object
365 * @lh: Loss Intervals database to update
366 * @skb: Currently received packet
367 * @ndp: The NDP count belonging to @skb
368 * @calc_first_li: Caller-dependent computation of first loss interval in @lh
369 * @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
370 * Chooses action according to pending loss, updates LI database when a new
371 * loss was detected, and does required post-processing. Returns 1 when caller
372 * should send feedback, 0 otherwise.
373 */
374int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
375 struct tfrc_loss_hist *lh,
376 struct sk_buff *skb, u32 ndp,
377 u32 (*calc_first_li)(struct sock *), struct sock *sk)
378{
379 int is_new_loss = 0;
283 380
284void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) 381 if (h->loss_count == 1) {
382 __one_after_loss(h, skb, ndp);
383 } else if (h->loss_count != 2) {
384 DCCP_BUG("invalid loss_count %d", h->loss_count);
385 } else if (__two_after_loss(h, skb, ndp)) {
386 /*
387 * Update Loss Interval database and recycle RX records
388 */
389 is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
390 __three_after_loss(h);
391 }
392 return is_new_loss;
393}
394EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
395
396int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
285{ 397{
286 struct dccp_rx_hist_entry *entry, *next; 398 int i;
399
400 for (i = 0; i <= TFRC_NDUPACK; i++) {
401 h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
402 if (h->ring[i] == NULL)
403 goto out_free;
404 }
405
406 h->loss_count = h->loss_start = 0;
407 return 0;
287 408
288 list_for_each_entry_safe(entry, next, list, dccphrx_node) { 409out_free:
289 list_del_init(&entry->dccphrx_node); 410 while (i-- != 0) {
290 kmem_cache_free(hist->dccprxh_slab, entry); 411 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
412 h->ring[i] = NULL;
291 } 413 }
414 return -ENOBUFS;
292} 415}
416EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
417
418void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
419{
420 int i;
293 421
294EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); 422 for (i = 0; i <= TFRC_NDUPACK; ++i)
423 if (h->ring[i] != NULL) {
424 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
425 h->ring[i] = NULL;
426 }
427}
428EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
295 429
430/**
431 * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
432 */
433static inline struct tfrc_rx_hist_entry *
434 tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
435{
436 return h->ring[0];
437}
296 438
297MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " 439/**
298 "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); 440 * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
299MODULE_DESCRIPTION("DCCP TFRC library"); 441 */
300MODULE_LICENSE("GPL"); 442static inline struct tfrc_rx_hist_entry *
443 tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
444{
445 return h->ring[h->rtt_sample_prev];
446}
447
448/**
449 * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
450 * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
451 * to compute a sample with given data - calling function should check this.
452 */
453u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
454{
455 u32 sample = 0,
456 delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
457 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
458
459 if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
460 if (h->rtt_sample_prev == 2) { /* previous candidate stored */
461 sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
462 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
463 if (sample)
464 sample = 4 / sample *
465 ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
466 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
467 else /*
468 * FIXME: This condition is in principle not
469 * possible but occurs when CCID is used for
470 * two-way data traffic. I have tried to trace
471 * it, but the cause does not seem to be here.
472 */
473 DCCP_BUG("please report to dccp@vger.kernel.org"
474 " => prev = %u, last = %u",
475 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
476 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
477 } else if (delta_v < 1) {
478 h->rtt_sample_prev = 1;
479 goto keep_ref_for_next_time;
480 }
481
482 } else if (delta_v == 4) /* optimal match */
483 sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
484 else { /* suboptimal match */
485 h->rtt_sample_prev = 2;
486 goto keep_ref_for_next_time;
487 }
488
489 if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
490 DCCP_WARN("RTT sample %u too large, using max\n", sample);
491 sample = DCCP_SANE_RTT_MAX;
492 }
493
494 h->rtt_sample_prev = 0; /* use current entry as next reference */
495keep_ref_for_next_time:
496
497 return sample;
498}
499EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 032bb61c6e39..c7eeda49cb20 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -1,10 +1,9 @@
1/* 1/*
2 * net/dccp/packet_history.h 2 * Packet RX/TX history data structures and routines for TFRC-based protocols.
3 * 3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
4 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. 5 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
5 * 6 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND 7 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/ 8 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz 9 * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
@@ -37,165 +36,128 @@
37#ifndef _DCCP_PKT_HIST_ 36#ifndef _DCCP_PKT_HIST_
38#define _DCCP_PKT_HIST_ 37#define _DCCP_PKT_HIST_
39 38
40#include <linux/ktime.h>
41#include <linux/list.h> 39#include <linux/list.h>
42#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h"
43 42
44#include "../../dccp.h" 43struct tfrc_tx_hist_entry;
45 44
46/* Number of later packets received before one is considered lost */ 45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
47#define TFRC_RECV_NUM_LATE_LOSS 3 46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
48 49
49#define TFRC_WIN_COUNT_PER_RTT 4 50/* Subtraction a-b modulo-16, respects circular wrap-around */
50#define TFRC_WIN_COUNT_LIMIT 16 51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
51 52
52/* 53/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */
53 * Transmitter History data structures and declarations 54#define TFRC_NDUPACK 3
55
56/**
57 * tfrc_rx_hist_entry - Store information about a single received packet
58 * @tfrchrx_seqno: DCCP packet sequence number
59 * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1)
60 * @tfrchrx_ndp: the NDP count (if any) of the packet
61 * @tfrchrx_tstamp: actual receive time of packet
54 */ 62 */
55struct dccp_tx_hist_entry { 63struct tfrc_rx_hist_entry {
56 struct list_head dccphtx_node; 64 u64 tfrchrx_seqno:48,
57 u64 dccphtx_seqno:48, 65 tfrchrx_ccval:4,
58 dccphtx_sent:1; 66 tfrchrx_type:4;
59 u32 dccphtx_rtt; 67 u32 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */
60 ktime_t dccphtx_tstamp; 68 ktime_t tfrchrx_tstamp;
61}; 69};
62 70
63struct dccp_tx_hist { 71/**
64 struct kmem_cache *dccptxh_slab; 72 * tfrc_rx_hist - RX history structure for TFRC-based protocols
73 *
74 * @ring: Packet history for RTT sampling and loss detection
75 * @loss_count: Number of entries in circular history
76 * @loss_start: Movable index (for loss detection)
77 * @rtt_sample_prev: Used during RTT sampling, points to candidate entry
78 */
79struct tfrc_rx_hist {
80 struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
81 u8 loss_count:2,
82 loss_start:2;
83#define rtt_sample_prev loss_start
65}; 84};
66 85
67extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); 86/**
68extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); 87 * tfrc_rx_hist_index - index to reach n-th entry after loss_start
69 88 */
70static inline struct dccp_tx_hist_entry * 89static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
71 dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
72 const gfp_t prio)
73{ 90{
74 struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, 91 return (h->loss_start + n) & TFRC_NDUPACK;
75 prio);
76
77 if (entry != NULL)
78 entry->dccphtx_sent = 0;
79
80 return entry;
81} 92}
82 93
83static inline struct dccp_tx_hist_entry * 94/**
84 dccp_tx_hist_head(struct list_head *list) 95 * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
96 */
97static inline struct tfrc_rx_hist_entry *
98 tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
85{ 99{
86 struct dccp_tx_hist_entry *head = NULL; 100 return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
87
88 if (!list_empty(list))
89 head = list_entry(list->next, struct dccp_tx_hist_entry,
90 dccphtx_node);
91 return head;
92} 101}
93 102
94extern struct dccp_tx_hist_entry * 103/**
95 dccp_tx_hist_find_entry(const struct list_head *list, 104 * tfrc_rx_hist_entry - return the n-th history entry after loss_start
96 const u64 seq); 105 */
97 106static inline struct tfrc_rx_hist_entry *
98static inline void dccp_tx_hist_add_entry(struct list_head *list, 107 tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
99 struct dccp_tx_hist_entry *entry)
100{ 108{
101 list_add(&entry->dccphtx_node, list); 109 return h->ring[tfrc_rx_hist_index(h, n)];
102} 110}
103 111
104static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, 112/**
105 struct dccp_tx_hist_entry *entry) 113 * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
114 */
115static inline struct tfrc_rx_hist_entry *
116 tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
106{ 117{
107 if (entry != NULL) 118 return h->ring[h->loss_start];
108 kmem_cache_free(hist->dccptxh_slab, entry);
109} 119}
110 120
111extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, 121/* initialise loss detection and disable RTT sampling */
112 struct list_head *list); 122static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
113
114extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
115 struct list_head *list,
116 struct dccp_tx_hist_entry *next);
117
118/*
119 * Receiver History data structures and declarations
120 */
121struct dccp_rx_hist_entry {
122 struct list_head dccphrx_node;
123 u64 dccphrx_seqno:48,
124 dccphrx_ccval:4,
125 dccphrx_type:4;
126 u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
127 ktime_t dccphrx_tstamp;
128};
129
130struct dccp_rx_hist {
131 struct kmem_cache *dccprxh_slab;
132};
133
134extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
135extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
136
137static inline struct dccp_rx_hist_entry *
138 dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
139 const u32 ndp,
140 const struct sk_buff *skb,
141 const gfp_t prio)
142{ 123{
143 struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, 124 h->loss_count = 1;
144 prio);
145
146 if (entry != NULL) {
147 const struct dccp_hdr *dh = dccp_hdr(skb);
148
149 entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
150 entry->dccphrx_ccval = dh->dccph_ccval;
151 entry->dccphrx_type = dh->dccph_type;
152 entry->dccphrx_ndp = ndp;
153 entry->dccphrx_tstamp = ktime_get_real();
154 }
155
156 return entry;
157} 125}
158 126
159static inline struct dccp_rx_hist_entry * 127/* indicate whether previously a packet was detected missing */
160 dccp_rx_hist_head(struct list_head *list) 128static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
161{ 129{
162 struct dccp_rx_hist_entry *head = NULL; 130 return h->loss_count;
163
164 if (!list_empty(list))
165 head = list_entry(list->next, struct dccp_rx_hist_entry,
166 dccphrx_node);
167 return head;
168} 131}
169 132
170extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, 133/* any data packets missing between last reception and skb ? */
171 u8 *ccval); 134static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
172extern struct dccp_rx_hist_entry * 135 const struct sk_buff *skb,
173 dccp_rx_hist_find_data_packet(const struct list_head *list); 136 u32 ndp)
174
175extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
176 struct list_head *rx_list,
177 struct list_head *li_list,
178 struct dccp_rx_hist_entry *packet,
179 u64 nonloss_seqno);
180
181static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
182 struct dccp_rx_hist_entry *entry)
183{ 137{
184 if (entry != NULL) 138 int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
185 kmem_cache_free(hist->dccprxh_slab, entry); 139 DCCP_SKB_CB(skb)->dccpd_seq);
186}
187 140
188extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, 141 if (delta > 1 && ndp < delta)
189 struct list_head *list); 142 tfrc_rx_hist_loss_indicated(h);
190 143
191static inline int 144 return tfrc_rx_hist_loss_pending(h);
192 dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
193{
194 return entry->dccphrx_type == DCCP_PKT_DATA ||
195 entry->dccphrx_type == DCCP_PKT_DATAACK;
196} 145}
197 146
198extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, 147extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
199 struct list_head *li_list, u8 *win_loss); 148 const struct sk_buff *skb, const u32 ndp);
149
150extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
151
152struct tfrc_loss_hist;
153extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
154 struct tfrc_loss_hist *lh,
155 struct sk_buff *skb, u32 ndp,
156 u32 (*first_li)(struct sock *sk),
157 struct sock *sk);
158extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
159 const struct sk_buff *skb);
160extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
161extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
200 162
201#endif /* _DCCP_PKT_HIST_ */ 163#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
new file mode 100644
index 000000000000..d1dfbb8de64c
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -0,0 +1,63 @@
1/*
2 * TFRC: main module holding the pieces of the TFRC library together
3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
5 * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
6 */
7#include <linux/module.h>
8#include <linux/moduleparam.h>
9#include "tfrc.h"
10
11#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
12int tfrc_debug;
13module_param(tfrc_debug, bool, 0444);
14MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
15#endif
16
17extern int tfrc_tx_packet_history_init(void);
18extern void tfrc_tx_packet_history_exit(void);
19extern int tfrc_rx_packet_history_init(void);
20extern void tfrc_rx_packet_history_exit(void);
21
22extern int tfrc_li_init(void);
23extern void tfrc_li_exit(void);
24
25static int __init tfrc_module_init(void)
26{
27 int rc = tfrc_li_init();
28
29 if (rc)
30 goto out;
31
32 rc = tfrc_tx_packet_history_init();
33 if (rc)
34 goto out_free_loss_intervals;
35
36 rc = tfrc_rx_packet_history_init();
37 if (rc)
38 goto out_free_tx_history;
39 return 0;
40
41out_free_tx_history:
42 tfrc_tx_packet_history_exit();
43out_free_loss_intervals:
44 tfrc_li_exit();
45out:
46 return rc;
47}
48
49static void __exit tfrc_module_exit(void)
50{
51 tfrc_rx_packet_history_exit();
52 tfrc_tx_packet_history_exit();
53 tfrc_li_exit();
54}
55
56module_init(tfrc_module_init);
57module_exit(tfrc_module_exit);
58
59MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, "
60 "Ian McDonald <ian.mcdonald@jandi.co.nz>, "
61 "Arnaldo Carvalho de Melo <acme@redhat.com>");
62MODULE_DESCRIPTION("DCCP TFRC library");
63MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index faf5f7e219e3..1fb1187bbf1c 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -3,10 +3,11 @@
3/* 3/*
4 * net/dccp/ccids/lib/tfrc.h 4 * net/dccp/ccids/lib/tfrc.h
5 * 5 *
6 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. 6 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
7 * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> 7 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 8 * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
9 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon 9 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
10 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by 13 * it under the terms of the GNU General Public License as published by
@@ -15,6 +16,17 @@
15 */ 16 */
16#include <linux/types.h> 17#include <linux/types.h>
17#include <asm/div64.h> 18#include <asm/div64.h>
19#include "../../dccp.h"
20/* internal includes that this module exports: */
21#include "loss_interval.h"
22#include "packet_history.h"
23
24#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
25extern int tfrc_debug;
26#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a)
27#else
28#define tfrc_pr_debug(format, a...)
29#endif
18 30
19/* integer-arithmetic divisions of type (a * 1000000)/b */ 31/* integer-arithmetic divisions of type (a * 1000000)/b */
20static inline u64 scaled_div(u64 a, u32 b) 32static inline u64 scaled_div(u64 a, u32 b)
@@ -37,6 +49,15 @@ static inline u32 scaled_div32(u64 a, u32 b)
37 return result; 49 return result;
38} 50}
39 51
52/**
53 * tfrc_ewma - Exponentially weighted moving average
54 * @weight: Weight to be used as damping factor, in units of 1/10
55 */
56static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
57{
58 return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
59}
60
40extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 61extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
41extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 62extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
42 63
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index ee97950d77d1..ebe59d98721a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -72,11 +72,21 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
72/* RFC 1122, 4.2.3.1 initial RTO value */ 72/* RFC 1122, 4.2.3.1 initial RTO value */
73#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) 73#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
74 74
75#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ 75/*
76 * The maximum back-off value for retransmissions. This is needed for
77 * - retransmitting client-Requests (sec. 8.1.1),
78 * - retransmitting Close/CloseReq when closing (sec. 8.3),
79 * - feature-negotiation retransmission (sec. 6.6.3),
80 * - Acks in client-PARTOPEN state (sec. 8.1.5).
81 */
82#define DCCP_RTO_MAX ((unsigned)(64 * HZ))
76 83
77/* bounds for sampled RTT values from packet exchanges (in usec) */ 84/*
85 * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
86 */
78#define DCCP_SANE_RTT_MIN 100 87#define DCCP_SANE_RTT_MIN 100
79#define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC) 88#define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5)
89#define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC)
80 90
81/* Maximal interval between probes for local resources. */ 91/* Maximal interval between probes for local resources. */
82#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) 92#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
@@ -143,12 +153,6 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
143 return after48(seq1, seq2) ? seq1 : seq2; 153 return after48(seq1, seq2) ? seq1 : seq2;
144} 154}
145 155
146/* is seq1 next seqno after seq2 */
147static inline int follows48(const u64 seq1, const u64 seq2)
148{
149 return dccp_delta_seqno(seq2, seq1) == 1;
150}
151
152enum { 156enum {
153 DCCP_MIB_NUM = 0, 157 DCCP_MIB_NUM = 0,
154 DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ 158 DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
@@ -334,6 +338,7 @@ struct dccp_skb_cb {
334 338
335#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) 339#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
336 340
341/* RFC 4340, sec. 7.7 */
337static inline int dccp_non_data_packet(const struct sk_buff *skb) 342static inline int dccp_non_data_packet(const struct sk_buff *skb)
338{ 343{
339 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; 344 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
@@ -346,6 +351,17 @@ static inline int dccp_non_data_packet(const struct sk_buff *skb)
346 type == DCCP_PKT_SYNCACK; 351 type == DCCP_PKT_SYNCACK;
347} 352}
348 353
354/* RFC 4340, sec. 7.7 */
355static inline int dccp_data_packet(const struct sk_buff *skb)
356{
357 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
358
359 return type == DCCP_PKT_DATA ||
360 type == DCCP_PKT_DATAACK ||
361 type == DCCP_PKT_REQUEST ||
362 type == DCCP_PKT_RESPONSE;
363}
364
349static inline int dccp_packet_without_ack(const struct sk_buff *skb) 365static inline int dccp_packet_without_ack(const struct sk_buff *skb)
350{ 366{
351 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; 367 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
@@ -406,6 +422,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
406} 422}
407 423
408extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); 424extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
425extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
409extern int dccp_insert_option_elapsed_time(struct sock *sk, 426extern int dccp_insert_option_elapsed_time(struct sock *sk,
410 struct sk_buff *skb, 427 struct sk_buff *skb,
411 u32 elapsed_time); 428 u32 elapsed_time);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 5ebdd86c1b99..4a4f6ce4498d 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -4,10 +4,16 @@
4 * An implementation of the DCCP protocol 4 * An implementation of the DCCP protocol
5 * Andrea Bittau <a.bittau@cs.ucl.ac.uk> 5 * Andrea Bittau <a.bittau@cs.ucl.ac.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * ASSUMPTIONS
8 * modify it under the terms of the GNU General Public License 8 * -----------
9 * as published by the Free Software Foundation; either version 9 * o All currently known SP features have 1-byte quantities. If in the future
10 * 2 of the License, or (at your option) any later version. 10 * extensions of RFCs 4340..42 define features with item lengths larger than
11 * one byte, a feature-specific extension of the code will be required.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
11 */ 17 */
12 18
13#include <linux/module.h> 19#include <linux/module.h>
@@ -24,11 +30,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
24 30
25 dccp_feat_debug(type, feature, *val); 31 dccp_feat_debug(type, feature, *val);
26 32
27 if (!dccp_feat_is_valid_type(type)) { 33 if (len > 3) {
28 DCCP_WARN("option type %d invalid in negotiation\n", type);
29 return 1;
30 }
31 if (!dccp_feat_is_valid_length(type, feature, len)) {
32 DCCP_WARN("invalid length %d\n", len); 34 DCCP_WARN("invalid length %d\n", len);
33 return 1; 35 return 1;
34 } 36 }
@@ -99,7 +101,6 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
99 return 0; 101 return 0;
100} 102}
101 103
102/* XXX taking only u8 vals */
103static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) 104static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
104{ 105{
105 dccp_feat_debug(type, feat, val); 106 dccp_feat_debug(type, feat, val);
@@ -144,7 +145,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
144 /* FIXME sanity check vals */ 145 /* FIXME sanity check vals */
145 146
146 /* Are values in any order? XXX Lame "algorithm" here */ 147 /* Are values in any order? XXX Lame "algorithm" here */
147 /* XXX assume values are 1 byte */
148 for (i = 0; i < slen; i++) { 148 for (i = 0; i < slen; i++) {
149 for (j = 0; j < rlen; j++) { 149 for (j = 0; j < rlen; j++) {
150 if (spref[i] == rpref[j]) { 150 if (spref[i] == rpref[j]) {
@@ -179,7 +179,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
179 } 179 }
180 180
181 /* need to put result and our preference list */ 181 /* need to put result and our preference list */
182 /* XXX assume 1 byte vals */
183 rlen = 1 + opt->dccpop_len; 182 rlen = 1 + opt->dccpop_len;
184 rpref = kmalloc(rlen, GFP_ATOMIC); 183 rpref = kmalloc(rlen, GFP_ATOMIC);
185 if (rpref == NULL) 184 if (rpref == NULL)
@@ -637,12 +636,12 @@ const char *dccp_feat_name(const u8 feat)
637 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", 636 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
638 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", 637 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
639 }; 638 };
639 if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
640 return feature_names[DCCPF_RESERVED];
641
640 if (feat >= DCCPF_MIN_CCID_SPECIFIC) 642 if (feat >= DCCPF_MIN_CCID_SPECIFIC)
641 return "CCID-specific"; 643 return "CCID-specific";
642 644
643 if (dccp_feat_is_reserved(feat))
644 return feature_names[DCCPF_RESERVED];
645
646 return feature_names[feat]; 645 return feature_names[feat];
647} 646}
648 647
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 177f7dee4d10..e272222c7ace 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -14,32 +14,6 @@
14#include <linux/types.h> 14#include <linux/types.h>
15#include "dccp.h" 15#include "dccp.h"
16 16
17static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len)
18{
19 /* sec. 6.1: Confirm has at least length 3,
20 * sec. 6.2: Change has at least length 4 */
21 if (len < 3)
22 return 1;
23 if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R))
24 return 1;
25 /* XXX: add per-feature length validation (sec. 6.6.8) */
26 return 0;
27}
28
29static inline int dccp_feat_is_reserved(const u8 feat)
30{
31 return (feat > DCCPF_DATA_CHECKSUM &&
32 feat < DCCPF_MIN_CCID_SPECIFIC) ||
33 feat == DCCPF_RESERVED;
34}
35
36/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */
37static inline int dccp_feat_is_valid_type(const u8 optnum)
38{
39 return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R;
40
41}
42
43#ifdef CONFIG_IP_DCCP_DEBUG 17#ifdef CONFIG_IP_DCCP_DEBUG
44extern const char *dccp_feat_typename(const u8 type); 18extern const char *dccp_feat_typename(const u8 type);
45extern const char *dccp_feat_name(const u8 feat); 19extern const char *dccp_feat_name(const u8 feat);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 1ce101062824..08392ed86c25 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -22,26 +22,77 @@
22/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ 22/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
23int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; 23int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8;
24 24
25static void dccp_fin(struct sock *sk, struct sk_buff *skb) 25static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb)
26{ 26{
27 sk->sk_shutdown |= RCV_SHUTDOWN;
28 sock_set_flag(sk, SOCK_DONE);
29 __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); 27 __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
30 __skb_queue_tail(&sk->sk_receive_queue, skb); 28 __skb_queue_tail(&sk->sk_receive_queue, skb);
31 skb_set_owner_r(skb, sk); 29 skb_set_owner_r(skb, sk);
32 sk->sk_data_ready(sk, 0); 30 sk->sk_data_ready(sk, 0);
33} 31}
34 32
35static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) 33static void dccp_fin(struct sock *sk, struct sk_buff *skb)
36{ 34{
37 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); 35 /*
38 dccp_fin(sk, skb); 36 * On receiving Close/CloseReq, both RD/WR shutdown are performed.
39 dccp_set_state(sk, DCCP_CLOSED); 37 * RFC 4340, 8.3 says that we MAY send further Data/DataAcks after
40 sk_wake_async(sk, 1, POLL_HUP); 38 * receiving the closing segment, but there is no guarantee that such
39 * data will be processed at all.
40 */
41 sk->sk_shutdown = SHUTDOWN_MASK;
42 sock_set_flag(sk, SOCK_DONE);
43 dccp_enqueue_skb(sk, skb);
44}
45
46static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
47{
48 int queued = 0;
49
50 switch (sk->sk_state) {
51 /*
52 * We ignore Close when received in one of the following states:
53 * - CLOSED (may be a late or duplicate packet)
54 * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier)
55 * - RESPOND (already handled by dccp_check_req)
56 */
57 case DCCP_CLOSING:
58 /*
59 * Simultaneous-close: receiving a Close after sending one. This
60 * can happen if both client and server perform active-close and
61 * will result in an endless ping-pong of crossing and retrans-
62 * mitted Close packets, which only terminates when one of the
63 * nodes times out (min. 64 seconds). Quicker convergence can be
64 * achieved when one of the nodes acts as tie-breaker.
65 * This is ok as both ends are done with data transfer and each
66 * end is just waiting for the other to acknowledge termination.
67 */
68 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT)
69 break;
70 /* fall through */
71 case DCCP_REQUESTING:
72 case DCCP_ACTIVE_CLOSEREQ:
73 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
74 dccp_done(sk);
75 break;
76 case DCCP_OPEN:
77 case DCCP_PARTOPEN:
78 /* Give waiting application a chance to read pending data */
79 queued = 1;
80 dccp_fin(sk, skb);
81 dccp_set_state(sk, DCCP_PASSIVE_CLOSE);
82 /* fall through */
83 case DCCP_PASSIVE_CLOSE:
84 /*
85 * Retransmitted Close: we have already enqueued the first one.
86 */
87 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
88 }
89 return queued;
41} 90}
42 91
43static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) 92static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
44{ 93{
94 int queued = 0;
95
45 /* 96 /*
46 * Step 7: Check for unexpected packet types 97 * Step 7: Check for unexpected packet types
47 * If (S.is_server and P.type == CloseReq) 98 * If (S.is_server and P.type == CloseReq)
@@ -50,12 +101,26 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
50 */ 101 */
51 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { 102 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
52 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); 103 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
53 return; 104 return queued;
54 } 105 }
55 106
56 if (sk->sk_state != DCCP_CLOSING) 107 /* Step 13: process relevant Client states < CLOSEREQ */
108 switch (sk->sk_state) {
109 case DCCP_REQUESTING:
110 dccp_send_close(sk, 0);
57 dccp_set_state(sk, DCCP_CLOSING); 111 dccp_set_state(sk, DCCP_CLOSING);
58 dccp_send_close(sk, 0); 112 break;
113 case DCCP_OPEN:
114 case DCCP_PARTOPEN:
115 /* Give waiting application a chance to read pending data */
116 queued = 1;
117 dccp_fin(sk, skb);
118 dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ);
119 /* fall through */
120 case DCCP_PASSIVE_CLOSEREQ:
121 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
122 }
123 return queued;
59} 124}
60 125
61static u8 dccp_reset_code_convert(const u8 code) 126static u8 dccp_reset_code_convert(const u8 code)
@@ -90,7 +155,7 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
90 dccp_fin(sk, skb); 155 dccp_fin(sk, skb);
91 156
92 if (err && !sock_flag(sk, SOCK_DEAD)) 157 if (err && !sock_flag(sk, SOCK_DEAD))
93 sk_wake_async(sk, 0, POLL_ERR); 158 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
94 dccp_time_wait(sk, DCCP_TIME_WAIT, 0); 159 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
95} 160}
96 161
@@ -103,6 +168,21 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
103 DCCP_SKB_CB(skb)->dccpd_ack_seq); 168 DCCP_SKB_CB(skb)->dccpd_ack_seq);
104} 169}
105 170
171static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
172{
173 const struct dccp_sock *dp = dccp_sk(sk);
174
175 /* Don't deliver to RX CCID when node has shut down read end. */
176 if (!(sk->sk_shutdown & RCV_SHUTDOWN))
177 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
178 /*
179 * Until the TX queue has been drained, we can not honour SHUT_WR, since
180 * we need received feedback as input to adjust congestion control.
181 */
182 if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN))
183 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
184}
185
106static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) 186static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
107{ 187{
108 const struct dccp_hdr *dh = dccp_hdr(skb); 188 const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -209,13 +289,11 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
209 case DCCP_PKT_DATAACK: 289 case DCCP_PKT_DATAACK:
210 case DCCP_PKT_DATA: 290 case DCCP_PKT_DATA:
211 /* 291 /*
212 * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED 292 * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when
213 * option if it is. 293 * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening"
294 * - sk_receive_queue is full, use Code 2, "Receive Buffer"
214 */ 295 */
215 __skb_pull(skb, dh->dccph_doff * 4); 296 dccp_enqueue_skb(sk, skb);
216 __skb_queue_tail(&sk->sk_receive_queue, skb);
217 skb_set_owner_r(skb, sk);
218 sk->sk_data_ready(sk, 0);
219 return 0; 297 return 0;
220 case DCCP_PKT_ACK: 298 case DCCP_PKT_ACK:
221 goto discard; 299 goto discard;
@@ -231,11 +309,13 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
231 dccp_rcv_reset(sk, skb); 309 dccp_rcv_reset(sk, skb);
232 return 0; 310 return 0;
233 case DCCP_PKT_CLOSEREQ: 311 case DCCP_PKT_CLOSEREQ:
234 dccp_rcv_closereq(sk, skb); 312 if (dccp_rcv_closereq(sk, skb))
313 return 0;
235 goto discard; 314 goto discard;
236 case DCCP_PKT_CLOSE: 315 case DCCP_PKT_CLOSE:
237 dccp_rcv_close(sk, skb); 316 if (dccp_rcv_close(sk, skb))
238 return 0; 317 return 0;
318 goto discard;
239 case DCCP_PKT_REQUEST: 319 case DCCP_PKT_REQUEST:
240 /* Step 7 320 /* Step 7
241 * or (S.is_server and P.type == Response) 321 * or (S.is_server and P.type == Response)
@@ -289,7 +369,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
289 if (dccp_check_seqno(sk, skb)) 369 if (dccp_check_seqno(sk, skb))
290 goto discard; 370 goto discard;
291 371
292 if (dccp_parse_options(sk, skb)) 372 if (dccp_parse_options(sk, NULL, skb))
293 goto discard; 373 goto discard;
294 374
295 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 375 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
@@ -300,9 +380,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
300 DCCP_SKB_CB(skb)->dccpd_seq, 380 DCCP_SKB_CB(skb)->dccpd_seq,
301 DCCP_ACKVEC_STATE_RECEIVED)) 381 DCCP_ACKVEC_STATE_RECEIVED))
302 goto discard; 382 goto discard;
303 383 dccp_deliver_input_to_ccids(sk, skb);
304 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
305 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
306 384
307 return __dccp_rcv_established(sk, skb, dh, len); 385 return __dccp_rcv_established(sk, skb, dh, len);
308discard: 386discard:
@@ -349,7 +427,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
349 goto out_invalid_packet; 427 goto out_invalid_packet;
350 } 428 }
351 429
352 if (dccp_parse_options(sk, skb)) 430 if (dccp_parse_options(sk, NULL, skb))
353 goto out_invalid_packet; 431 goto out_invalid_packet;
354 432
355 /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ 433 /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
@@ -402,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
402 480
403 if (!sock_flag(sk, SOCK_DEAD)) { 481 if (!sock_flag(sk, SOCK_DEAD)) {
404 sk->sk_state_change(sk); 482 sk->sk_state_change(sk);
405 sk_wake_async(sk, 0, POLL_OUT); 483 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
406 } 484 }
407 485
408 if (sk->sk_write_pending || icsk->icsk_ack.pingpong || 486 if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
@@ -531,7 +609,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
531 /* 609 /*
532 * Step 8: Process options and mark acknowledgeable 610 * Step 8: Process options and mark acknowledgeable
533 */ 611 */
534 if (dccp_parse_options(sk, skb)) 612 if (dccp_parse_options(sk, NULL, skb))
535 goto discard; 613 goto discard;
536 614
537 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 615 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
@@ -543,8 +621,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
543 DCCP_ACKVEC_STATE_RECEIVED)) 621 DCCP_ACKVEC_STATE_RECEIVED))
544 goto discard; 622 goto discard;
545 623
546 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 624 dccp_deliver_input_to_ccids(sk, skb);
547 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
548 } 625 }
549 626
550 /* 627 /*
@@ -560,16 +637,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
560 return 0; 637 return 0;
561 /* 638 /*
562 * Step 7: Check for unexpected packet types 639 * Step 7: Check for unexpected packet types
563 * If (S.is_server and P.type == CloseReq) 640 * If (S.is_server and P.type == Response)
564 * or (S.is_server and P.type == Response)
565 * or (S.is_client and P.type == Request) 641 * or (S.is_client and P.type == Request)
566 * or (S.state == RESPOND and P.type == Data), 642 * or (S.state == RESPOND and P.type == Data),
567 * Send Sync packet acknowledging P.seqno 643 * Send Sync packet acknowledging P.seqno
568 * Drop packet and return 644 * Drop packet and return
569 */ 645 */
570 } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && 646 } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
571 (dh->dccph_type == DCCP_PKT_RESPONSE || 647 dh->dccph_type == DCCP_PKT_RESPONSE) ||
572 dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
573 (dp->dccps_role == DCCP_ROLE_CLIENT && 648 (dp->dccps_role == DCCP_ROLE_CLIENT &&
574 dh->dccph_type == DCCP_PKT_REQUEST) || 649 dh->dccph_type == DCCP_PKT_REQUEST) ||
575 (sk->sk_state == DCCP_RESPOND && 650 (sk->sk_state == DCCP_RESPOND &&
@@ -577,11 +652,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
577 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); 652 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
578 goto discard; 653 goto discard;
579 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { 654 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
580 dccp_rcv_closereq(sk, skb); 655 if (dccp_rcv_closereq(sk, skb))
656 return 0;
581 goto discard; 657 goto discard;
582 } else if (dh->dccph_type == DCCP_PKT_CLOSE) { 658 } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
583 dccp_rcv_close(sk, skb); 659 if (dccp_rcv_close(sk, skb))
584 return 0; 660 return 0;
661 goto discard;
585 } 662 }
586 663
587 switch (sk->sk_state) { 664 switch (sk->sk_state) {
@@ -611,7 +688,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
611 switch (old_state) { 688 switch (old_state) {
612 case DCCP_PARTOPEN: 689 case DCCP_PARTOPEN:
613 sk->sk_state_change(sk); 690 sk->sk_state_change(sk);
614 sk_wake_async(sk, 0, POLL_OUT); 691 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
615 break; 692 break;
616 } 693 }
617 } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { 694 } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 01a6a808bdb7..c982ad88223d 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -218,7 +218,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
218 return; 218 return;
219 } 219 }
220 220
221 sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, 221 sk = inet_lookup(&init_net, &dccp_hashinfo, iph->daddr, dh->dccph_dport,
222 iph->saddr, dh->dccph_sport, inet_iif(skb)); 222 iph->saddr, dh->dccph_sport, inet_iif(skb));
223 if (sk == NULL) { 223 if (sk == NULL) {
224 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 224 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
@@ -408,7 +408,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
408 408
409 dccp_sync_mss(newsk, dst_mtu(dst)); 409 dccp_sync_mss(newsk, dst_mtu(dst));
410 410
411 __inet_hash(&dccp_hashinfo, newsk, 0); 411 __inet_hash_nolisten(&dccp_hashinfo, newsk);
412 __inet_inherit_port(&dccp_hashinfo, sk, newsk); 412 __inet_inherit_port(&dccp_hashinfo, sk, newsk);
413 413
414 return newsk; 414 return newsk;
@@ -436,7 +436,7 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
436 if (req != NULL) 436 if (req != NULL)
437 return dccp_check_req(sk, skb, req, prev); 437 return dccp_check_req(sk, skb, req, prev);
438 438
439 nsk = inet_lookup_established(&dccp_hashinfo, 439 nsk = inet_lookup_established(&init_net, &dccp_hashinfo,
440 iph->saddr, dh->dccph_sport, 440 iph->saddr, dh->dccph_sport,
441 iph->daddr, dh->dccph_dport, 441 iph->daddr, dh->dccph_dport,
442 inet_iif(skb)); 442 inet_iif(skb));
@@ -469,7 +469,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
469 }; 469 };
470 470
471 security_skb_classify_flow(skb, &fl); 471 security_skb_classify_flow(skb, &fl);
472 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 472 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
473 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 473 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
474 return NULL; 474 return NULL;
475 } 475 }
@@ -600,11 +600,12 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
600 if (req == NULL) 600 if (req == NULL)
601 goto drop; 601 goto drop;
602 602
603 if (dccp_parse_options(sk, skb))
604 goto drop_and_free;
605
606 dccp_reqsk_init(req, skb); 603 dccp_reqsk_init(req, skb);
607 604
605 dreq = dccp_rsk(req);
606 if (dccp_parse_options(sk, dreq, skb))
607 goto drop_and_free;
608
608 if (security_inet_conn_request(sk, skb, req)) 609 if (security_inet_conn_request(sk, skb, req))
609 goto drop_and_free; 610 goto drop_and_free;
610 611
@@ -621,7 +622,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
621 * In fact we defer setting S.GSR, S.SWL, S.SWH to 622 * In fact we defer setting S.GSR, S.SWL, S.SWH to
622 * dccp_create_openreq_child. 623 * dccp_create_openreq_child.
623 */ 624 */
624 dreq = dccp_rsk(req);
625 dreq->dreq_isr = dcb->dccpd_seq; 625 dreq->dreq_isr = dcb->dccpd_seq;
626 dreq->dreq_iss = dccp_v4_init_sequence(skb); 626 dreq->dreq_iss = dccp_v4_init_sequence(skb);
627 dreq->dreq_service = service; 627 dreq->dreq_service = service;
@@ -817,7 +817,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
817 817
818 /* Step 2: 818 /* Step 2:
819 * Look up flow ID in table and get corresponding socket */ 819 * Look up flow ID in table and get corresponding socket */
820 sk = __inet_lookup(&dccp_hashinfo, 820 sk = __inet_lookup(&init_net, &dccp_hashinfo,
821 iph->saddr, dh->dccph_sport, 821 iph->saddr, dh->dccph_sport,
822 iph->daddr, dh->dccph_dport, inet_iif(skb)); 822 iph->daddr, dh->dccph_dport, inet_iif(skb));
823 /* 823 /*
@@ -922,6 +922,8 @@ static struct timewait_sock_ops dccp_timewait_sock_ops = {
922 .twsk_obj_size = sizeof(struct inet_timewait_sock), 922 .twsk_obj_size = sizeof(struct inet_timewait_sock),
923}; 923};
924 924
925DEFINE_PROTO_INUSE(dccp_v4)
926
925static struct proto dccp_v4_prot = { 927static struct proto dccp_v4_prot = {
926 .name = "DCCP", 928 .name = "DCCP",
927 .owner = THIS_MODULE, 929 .owner = THIS_MODULE,
@@ -950,6 +952,7 @@ static struct proto dccp_v4_prot = {
950 .compat_setsockopt = compat_dccp_setsockopt, 952 .compat_setsockopt = compat_dccp_setsockopt,
951 .compat_getsockopt = compat_dccp_getsockopt, 953 .compat_getsockopt = compat_dccp_getsockopt,
952#endif 954#endif
955 REF_PROTO_INUSE(dccp_v4)
953}; 956};
954 957
955static struct net_protocol dccp_v4_protocol = { 958static struct net_protocol dccp_v4_protocol = {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 62428ff137dd..ed0a0053a797 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -101,8 +101,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
101 int err; 101 int err;
102 __u64 seq; 102 __u64 seq;
103 103
104 sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport, 104 sk = inet6_lookup(&init_net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
105 &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); 105 &hdr->saddr, dh->dccph_sport, inet6_iif(skb));
106 106
107 if (sk == NULL) { 107 if (sk == NULL) {
108 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); 108 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
@@ -366,7 +366,7 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
366 if (req != NULL) 366 if (req != NULL)
367 return dccp_check_req(sk, skb, req, prev); 367 return dccp_check_req(sk, skb, req, prev);
368 368
369 nsk = __inet6_lookup_established(&dccp_hashinfo, 369 nsk = __inet6_lookup_established(&init_net, &dccp_hashinfo,
370 &iph->saddr, dh->dccph_sport, 370 &iph->saddr, dh->dccph_sport,
371 &iph->daddr, ntohs(dh->dccph_dport), 371 &iph->daddr, ntohs(dh->dccph_dport),
372 inet6_iif(skb)); 372 inet6_iif(skb));
@@ -415,11 +415,12 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
415 if (req == NULL) 415 if (req == NULL)
416 goto drop; 416 goto drop;
417 417
418 if (dccp_parse_options(sk, skb))
419 goto drop_and_free;
420
421 dccp_reqsk_init(req, skb); 418 dccp_reqsk_init(req, skb);
422 419
420 dreq = dccp_rsk(req);
421 if (dccp_parse_options(sk, dreq, skb))
422 goto drop_and_free;
423
423 if (security_inet_conn_request(sk, skb, req)) 424 if (security_inet_conn_request(sk, skb, req))
424 goto drop_and_free; 425 goto drop_and_free;
425 426
@@ -449,7 +450,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
449 * In fact we defer setting S.GSR, S.SWL, S.SWH to 450 * In fact we defer setting S.GSR, S.SWL, S.SWH to
450 * dccp_create_openreq_child. 451 * dccp_create_openreq_child.
451 */ 452 */
452 dreq = dccp_rsk(req);
453 dreq->dreq_isr = dcb->dccpd_seq; 453 dreq->dreq_isr = dcb->dccpd_seq;
454 dreq->dreq_iss = dccp_v6_init_sequence(skb); 454 dreq->dreq_iss = dccp_v6_init_sequence(skb);
455 dreq->dreq_service = service; 455 dreq->dreq_service = service;
@@ -797,7 +797,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
797 797
798 /* Step 2: 798 /* Step 2:
799 * Look up flow ID in table and get corresponding socket */ 799 * Look up flow ID in table and get corresponding socket */
800 sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr, 800 sk = __inet6_lookup(&init_net, &dccp_hashinfo, &ipv6_hdr(skb)->saddr,
801 dh->dccph_sport, 801 dh->dccph_sport,
802 &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), 802 &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
803 inet6_iif(skb)); 803 inet6_iif(skb));
@@ -994,7 +994,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
994 if (final_p) 994 if (final_p)
995 ipv6_addr_copy(&fl.fl6_dst, final_p); 995 ipv6_addr_copy(&fl.fl6_dst, final_p);
996 996
997 err = __xfrm_lookup(&dst, &fl, sk, 1); 997 err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT);
998 if (err < 0) { 998 if (err < 0) {
999 if (err == -EREMOTE) 999 if (err == -EREMOTE)
1000 err = ip6_dst_blackhole(sk, &dst, &fl); 1000 err = ip6_dst_blackhole(sk, &dst, &fl);
@@ -1107,6 +1107,8 @@ static struct timewait_sock_ops dccp6_timewait_sock_ops = {
1107 .twsk_obj_size = sizeof(struct dccp6_timewait_sock), 1107 .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
1108}; 1108};
1109 1109
1110DEFINE_PROTO_INUSE(dccp_v6)
1111
1110static struct proto dccp_v6_prot = { 1112static struct proto dccp_v6_prot = {
1111 .name = "DCCPv6", 1113 .name = "DCCPv6",
1112 .owner = THIS_MODULE, 1114 .owner = THIS_MODULE,
@@ -1135,6 +1137,7 @@ static struct proto dccp_v6_prot = {
1135 .compat_setsockopt = compat_dccp_setsockopt, 1137 .compat_setsockopt = compat_dccp_setsockopt,
1136 .compat_getsockopt = compat_dccp_getsockopt, 1138 .compat_getsockopt = compat_dccp_getsockopt,
1137#endif 1139#endif
1140 REF_PROTO_INUSE(dccp_v6)
1138}; 1141};
1139 1142
1140static struct inet6_protocol dccp_v6_protocol = { 1143static struct inet6_protocol dccp_v6_protocol = {
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 831b76e08d02..027d1814e1ab 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -117,11 +117,13 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
117 struct dccp_sock *newdp = dccp_sk(newsk); 117 struct dccp_sock *newdp = dccp_sk(newsk);
118 struct dccp_minisock *newdmsk = dccp_msk(newsk); 118 struct dccp_minisock *newdmsk = dccp_msk(newsk);
119 119
120 newdp->dccps_role = DCCP_ROLE_SERVER; 120 newdp->dccps_role = DCCP_ROLE_SERVER;
121 newdp->dccps_hc_rx_ackvec = NULL; 121 newdp->dccps_hc_rx_ackvec = NULL;
122 newdp->dccps_service_list = NULL; 122 newdp->dccps_service_list = NULL;
123 newdp->dccps_service = dreq->dreq_service; 123 newdp->dccps_service = dreq->dreq_service;
124 newicsk->icsk_rto = DCCP_TIMEOUT_INIT; 124 newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
125 newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
126 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
125 127
126 if (dccp_feat_clone(sk, newsk)) 128 if (dccp_feat_clone(sk, newsk))
127 goto out_free; 129 goto out_free;
@@ -200,10 +202,10 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
200 struct request_sock **prev) 202 struct request_sock **prev)
201{ 203{
202 struct sock *child = NULL; 204 struct sock *child = NULL;
205 struct dccp_request_sock *dreq = dccp_rsk(req);
203 206
204 /* Check for retransmitted REQUEST */ 207 /* Check for retransmitted REQUEST */
205 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { 208 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
206 struct dccp_request_sock *dreq = dccp_rsk(req);
207 209
208 if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) { 210 if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) {
209 dccp_pr_debug("Retransmitted REQUEST\n"); 211 dccp_pr_debug("Retransmitted REQUEST\n");
@@ -227,22 +229,22 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
227 goto drop; 229 goto drop;
228 230
229 /* Invalid ACK */ 231 /* Invalid ACK */
230 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { 232 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dreq->dreq_iss) {
231 dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " 233 dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
232 "dreq_iss=%llu\n", 234 "dreq_iss=%llu\n",
233 (unsigned long long) 235 (unsigned long long)
234 DCCP_SKB_CB(skb)->dccpd_ack_seq, 236 DCCP_SKB_CB(skb)->dccpd_ack_seq,
235 (unsigned long long) 237 (unsigned long long) dreq->dreq_iss);
236 dccp_rsk(req)->dreq_iss);
237 goto drop; 238 goto drop;
238 } 239 }
239 240
241 if (dccp_parse_options(sk, dreq, skb))
242 goto drop;
243
240 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 244 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
241 if (child == NULL) 245 if (child == NULL)
242 goto listen_overflow; 246 goto listen_overflow;
243 247
244 /* FIXME: deal with options */
245
246 inet_csk_reqsk_queue_unlink(sk, req, prev); 248 inet_csk_reqsk_queue_unlink(sk, req, prev);
247 inet_csk_reqsk_queue_removed(sk, req); 249 inet_csk_reqsk_queue_removed(sk, req);
248 inet_csk_reqsk_queue_add(sk, req, child); 250 inet_csk_reqsk_queue_add(sk, req, child);
@@ -303,9 +305,12 @@ EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
303 305
304void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) 306void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
305{ 307{
306 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; 308 struct dccp_request_sock *dreq = dccp_rsk(req);
307 inet_rsk(req)->acked = 0; 309
308 req->rcv_wnd = sysctl_dccp_feat_sequence_window; 310 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
311 inet_rsk(req)->acked = 0;
312 req->rcv_wnd = sysctl_dccp_feat_sequence_window;
313 dreq->dreq_timestamp_echo = 0;
309} 314}
310 315
311EXPORT_SYMBOL_GPL(dccp_reqsk_init); 316EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d286cffe2c49..d2a84a2fecee 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -46,7 +46,13 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
46 return value; 46 return value;
47} 47}
48 48
49int dccp_parse_options(struct sock *sk, struct sk_buff *skb) 49/**
50 * dccp_parse_options - Parse DCCP options present in @skb
51 * @sk: client|server|listening dccp socket (when @dreq != NULL)
52 * @dreq: request socket to use during connection setup, or NULL
53 */
54int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
55 struct sk_buff *skb)
50{ 56{
51 struct dccp_sock *dp = dccp_sk(sk); 57 struct dccp_sock *dp = dccp_sk(sk);
52 const struct dccp_hdr *dh = dccp_hdr(skb); 58 const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -92,6 +98,20 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
92 goto out_invalid_option; 98 goto out_invalid_option;
93 } 99 }
94 100
101 /*
102 * CCID-Specific Options (from RFC 4340, sec. 10.3):
103 *
104 * Option numbers 128 through 191 are for options sent from the
105 * HC-Sender to the HC-Receiver; option numbers 192 through 255
106 * are for options sent from the HC-Receiver to the HC-Sender.
107 *
108 * CCID-specific options are ignored during connection setup, as
109 * negotiation may still be in progress (see RFC 4340, 10.3).
110 *
111 */
112 if (dreq != NULL && opt >= 128)
113 goto ignore_option;
114
95 switch (opt) { 115 switch (opt) {
96 case DCCPO_PADDING: 116 case DCCPO_PADDING:
97 break; 117 break;
@@ -112,6 +132,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
112 case DCCPO_CHANGE_L: 132 case DCCPO_CHANGE_L:
113 /* fall through */ 133 /* fall through */
114 case DCCPO_CHANGE_R: 134 case DCCPO_CHANGE_R:
135 if (pkt_type == DCCP_PKT_DATA)
136 break;
115 if (len < 2) 137 if (len < 2)
116 goto out_invalid_option; 138 goto out_invalid_option;
117 rc = dccp_feat_change_recv(sk, opt, *value, value + 1, 139 rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
@@ -128,7 +150,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
128 case DCCPO_CONFIRM_L: 150 case DCCPO_CONFIRM_L:
129 /* fall through */ 151 /* fall through */
130 case DCCPO_CONFIRM_R: 152 case DCCPO_CONFIRM_R:
131 if (len < 2) 153 if (pkt_type == DCCP_PKT_DATA)
154 break;
155 if (len < 2) /* FIXME this disallows empty confirm */
132 goto out_invalid_option; 156 goto out_invalid_option;
133 if (dccp_feat_confirm_recv(sk, opt, *value, 157 if (dccp_feat_confirm_recv(sk, opt, *value,
134 value + 1, len - 1)) 158 value + 1, len - 1))
@@ -136,7 +160,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
136 break; 160 break;
137 case DCCPO_ACK_VECTOR_0: 161 case DCCPO_ACK_VECTOR_0:
138 case DCCPO_ACK_VECTOR_1: 162 case DCCPO_ACK_VECTOR_1:
139 if (pkt_type == DCCP_PKT_DATA) 163 if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
140 break; 164 break;
141 165
142 if (dccp_msk(sk)->dccpms_send_ack_vector && 166 if (dccp_msk(sk)->dccpms_send_ack_vector &&
@@ -146,15 +170,27 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
146 case DCCPO_TIMESTAMP: 170 case DCCPO_TIMESTAMP:
147 if (len != 4) 171 if (len != 4)
148 goto out_invalid_option; 172 goto out_invalid_option;
149 173 /*
174 * RFC 4340 13.1: "The precise time corresponding to
175 * Timestamp Value zero is not specified". We use
176 * zero to indicate absence of a meaningful timestamp.
177 */
150 opt_val = get_unaligned((__be32 *)value); 178 opt_val = get_unaligned((__be32 *)value);
151 opt_recv->dccpor_timestamp = ntohl(opt_val); 179 if (unlikely(opt_val == 0)) {
152 180 DCCP_WARN("Timestamp with zero value\n");
153 dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; 181 break;
154 dp->dccps_timestamp_time = ktime_get_real(); 182 }
155 183
184 if (dreq != NULL) {
185 dreq->dreq_timestamp_echo = ntohl(opt_val);
186 dreq->dreq_timestamp_time = dccp_timestamp();
187 } else {
188 opt_recv->dccpor_timestamp =
189 dp->dccps_timestamp_echo = ntohl(opt_val);
190 dp->dccps_timestamp_time = dccp_timestamp();
191 }
156 dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", 192 dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
157 dccp_role(sk), opt_recv->dccpor_timestamp, 193 dccp_role(sk), ntohl(opt_val),
158 (unsigned long long) 194 (unsigned long long)
159 DCCP_SKB_CB(skb)->dccpd_ack_seq); 195 DCCP_SKB_CB(skb)->dccpd_ack_seq);
160 break; 196 break;
@@ -194,18 +230,17 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
194 opt_recv->dccpor_elapsed_time = elapsed_time; 230 opt_recv->dccpor_elapsed_time = elapsed_time;
195 break; 231 break;
196 case DCCPO_ELAPSED_TIME: 232 case DCCPO_ELAPSED_TIME:
197 if (len != 2 && len != 4) 233 if (dccp_packet_without_ack(skb)) /* RFC 4340, 13.2 */
198 goto out_invalid_option; 234 break;
199
200 if (pkt_type == DCCP_PKT_DATA)
201 continue;
202 235
203 if (len == 2) { 236 if (len == 2) {
204 __be16 opt_val2 = get_unaligned((__be16 *)value); 237 __be16 opt_val2 = get_unaligned((__be16 *)value);
205 elapsed_time = ntohs(opt_val2); 238 elapsed_time = ntohs(opt_val2);
206 } else { 239 } else if (len == 4) {
207 opt_val = get_unaligned((__be32 *)value); 240 opt_val = get_unaligned((__be32 *)value);
208 elapsed_time = ntohl(opt_val); 241 elapsed_time = ntohl(opt_val);
242 } else {
243 goto out_invalid_option;
209 } 244 }
210 245
211 if (elapsed_time > opt_recv->dccpor_elapsed_time) 246 if (elapsed_time > opt_recv->dccpor_elapsed_time)
@@ -214,15 +249,6 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
214 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", 249 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
215 dccp_role(sk), elapsed_time); 250 dccp_role(sk), elapsed_time);
216 break; 251 break;
217 /*
218 * From RFC 4340, sec. 10.3:
219 *
220 * Option numbers 128 through 191 are for
221 * options sent from the HC-Sender to the
222 * HC-Receiver; option numbers 192 through 255
223 * are for options sent from the HC-Receiver to
224 * the HC-Sender.
225 */
226 case 128 ... 191: { 252 case 128 ... 191: {
227 const u16 idx = value - options; 253 const u16 idx = value - options;
228 254
@@ -246,7 +272,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
246 "implemented, ignoring", sk, opt, len); 272 "implemented, ignoring", sk, opt, len);
247 break; 273 break;
248 } 274 }
249 275ignore_option:
250 if (opt != DCCPO_MANDATORY) 276 if (opt != DCCPO_MANDATORY)
251 mandatory = 0; 277 mandatory = 0;
252 } 278 }
@@ -382,16 +408,24 @@ int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
382 408
383EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); 409EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
384 410
385static int dccp_insert_option_timestamp_echo(struct sock *sk, 411static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
412 struct dccp_request_sock *dreq,
386 struct sk_buff *skb) 413 struct sk_buff *skb)
387{ 414{
388 struct dccp_sock *dp = dccp_sk(sk);
389 __be32 tstamp_echo; 415 __be32 tstamp_echo;
390 int len, elapsed_time_len;
391 unsigned char *to; 416 unsigned char *to;
392 const suseconds_t delta = ktime_us_delta(ktime_get_real(), 417 u32 elapsed_time, elapsed_time_len, len;
393 dp->dccps_timestamp_time); 418
394 u32 elapsed_time = delta / 10; 419 if (dreq != NULL) {
420 elapsed_time = dccp_timestamp() - dreq->dreq_timestamp_time;
421 tstamp_echo = htonl(dreq->dreq_timestamp_echo);
422 dreq->dreq_timestamp_echo = 0;
423 } else {
424 elapsed_time = dccp_timestamp() - dp->dccps_timestamp_time;
425 tstamp_echo = htonl(dp->dccps_timestamp_echo);
426 dp->dccps_timestamp_echo = 0;
427 }
428
395 elapsed_time_len = dccp_elapsed_time_len(elapsed_time); 429 elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
396 len = 6 + elapsed_time_len; 430 len = 6 + elapsed_time_len;
397 431
@@ -404,7 +438,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
404 *to++ = DCCPO_TIMESTAMP_ECHO; 438 *to++ = DCCPO_TIMESTAMP_ECHO;
405 *to++ = len; 439 *to++ = len;
406 440
407 tstamp_echo = htonl(dp->dccps_timestamp_echo);
408 memcpy(to, &tstamp_echo, 4); 441 memcpy(to, &tstamp_echo, 4);
409 to += 4; 442 to += 4;
410 443
@@ -416,8 +449,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
416 memcpy(to, &var32, 4); 449 memcpy(to, &var32, 4);
417 } 450 }
418 451
419 dp->dccps_timestamp_echo = 0;
420 dp->dccps_timestamp_time = ktime_set(0, 0);
421 return 0; 452 return 0;
422} 453}
423 454
@@ -510,6 +541,18 @@ static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
510 return 0; 541 return 0;
511} 542}
512 543
544/* The length of all options needs to be a multiple of 4 (5.8) */
545static void dccp_insert_option_padding(struct sk_buff *skb)
546{
547 int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
548
549 if (padding != 0) {
550 padding = 4 - padding;
551 memset(skb_push(skb, padding), 0, padding);
552 DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
553 }
554}
555
513int dccp_insert_options(struct sock *sk, struct sk_buff *skb) 556int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
514{ 557{
515 struct dccp_sock *dp = dccp_sk(sk); 558 struct dccp_sock *dp = dccp_sk(sk);
@@ -526,10 +569,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
526 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && 569 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
527 dccp_insert_option_ackvec(sk, skb)) 570 dccp_insert_option_ackvec(sk, skb))
528 return -1; 571 return -1;
529
530 if (dp->dccps_timestamp_echo != 0 &&
531 dccp_insert_option_timestamp_echo(sk, skb))
532 return -1;
533 } 572 }
534 573
535 if (dp->dccps_hc_rx_insert_options) { 574 if (dp->dccps_hc_rx_insert_options) {
@@ -553,18 +592,22 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
553 dccp_insert_option_timestamp(sk, skb)) 592 dccp_insert_option_timestamp(sk, skb))
554 return -1; 593 return -1;
555 594
556 /* XXX: insert other options when appropriate */ 595 if (dp->dccps_timestamp_echo != 0 &&
596 dccp_insert_option_timestamp_echo(dp, NULL, skb))
597 return -1;
598
599 dccp_insert_option_padding(skb);
600 return 0;
601}
557 602
558 if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { 603int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
559 /* The length of all options has to be a multiple of 4 */ 604{
560 int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; 605 DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
561 606
562 if (padding != 0) { 607 if (dreq->dreq_timestamp_echo != 0 &&
563 padding = 4 - padding; 608 dccp_insert_option_timestamp_echo(NULL, dreq, skb))
564 memset(skb_push(skb, padding), 0, padding); 609 return -1;
565 DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
566 }
567 }
568 610
611 dccp_insert_option_padding(skb);
569 return 0; 612 return 0;
570} 613}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index f49544618f20..3b763db3d863 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -133,15 +133,31 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
133 return -ENOBUFS; 133 return -ENOBUFS;
134} 134}
135 135
136/**
137 * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits
138 * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
139 * since the RX CCID is restricted to feedback packets (Acks), which are small
140 * in comparison with the data traffic. A value of 0 means "no current CCMPS".
141 */
142static u32 dccp_determine_ccmps(const struct dccp_sock *dp)
143{
144 const struct ccid *tx_ccid = dp->dccps_hc_tx_ccid;
145
146 if (tx_ccid == NULL || tx_ccid->ccid_ops == NULL)
147 return 0;
148 return tx_ccid->ccid_ops->ccid_ccmps;
149}
150
136unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) 151unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
137{ 152{
138 struct inet_connection_sock *icsk = inet_csk(sk); 153 struct inet_connection_sock *icsk = inet_csk(sk);
139 struct dccp_sock *dp = dccp_sk(sk); 154 struct dccp_sock *dp = dccp_sk(sk);
140 int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - 155 u32 ccmps = dccp_determine_ccmps(dp);
141 sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); 156 int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
142 157
143 /* Now subtract optional transport overhead */ 158 /* Account for header lengths and IPv4/v6 option overhead */
144 mss_now -= icsk->icsk_ext_hdr_len; 159 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
160 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
145 161
146 /* 162 /*
147 * FIXME: this should come from the CCID infrastructure, where, say, 163 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -151,13 +167,13 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
151 * make it a multiple of 4 167 * make it a multiple of 4
152 */ 168 */
153 169
154 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; 170 cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
155 171
156 /* And store cached results */ 172 /* And store cached results */
157 icsk->icsk_pmtu_cookie = pmtu; 173 icsk->icsk_pmtu_cookie = pmtu;
158 dp->dccps_mss_cache = mss_now; 174 dp->dccps_mss_cache = cur_mps;
159 175
160 return mss_now; 176 return cur_mps;
161} 177}
162 178
163EXPORT_SYMBOL_GPL(dccp_sync_mss); 179EXPORT_SYMBOL_GPL(dccp_sync_mss);
@@ -170,7 +186,7 @@ void dccp_write_space(struct sock *sk)
170 wake_up_interruptible(sk->sk_sleep); 186 wake_up_interruptible(sk->sk_sleep);
171 /* Should agree with poll, otherwise some programs break */ 187 /* Should agree with poll, otherwise some programs break */
172 if (sock_writeable(sk)) 188 if (sock_writeable(sk))
173 sk_wake_async(sk, 2, POLL_OUT); 189 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
174 190
175 read_unlock(&sk->sk_callback_lock); 191 read_unlock(&sk->sk_callback_lock);
176} 192}
@@ -303,7 +319,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
303 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 319 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
304 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; 320 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
305 321
306 if (dccp_insert_options(sk, skb)) { 322 if (dccp_insert_options_rsk(dreq, skb)) {
307 kfree_skb(skb); 323 kfree_skb(skb);
308 return NULL; 324 return NULL;
309 } 325 }
@@ -391,7 +407,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
391 * FIXME: what if rebuild_header fails? 407 * FIXME: what if rebuild_header fails?
392 * Should we be doing a rebuild_header here? 408 * Should we be doing a rebuild_header here?
393 */ 409 */
394 int err = inet_sk_rebuild_header(sk); 410 int err = inet_csk(sk)->icsk_af_ops->rebuild_header(sk);
395 411
396 if (err != 0) 412 if (err != 0)
397 return err; 413 return err;
@@ -567,14 +583,27 @@ void dccp_send_close(struct sock *sk, const int active)
567 583
568 /* Reserve space for headers and prepare control bits. */ 584 /* Reserve space for headers and prepare control bits. */
569 skb_reserve(skb, sk->sk_prot->max_header); 585 skb_reserve(skb, sk->sk_prot->max_header);
570 DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? 586 if (dp->dccps_role == DCCP_ROLE_SERVER && !dp->dccps_server_timewait)
571 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; 587 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSEREQ;
588 else
589 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
572 590
573 if (active) { 591 if (active) {
574 dccp_write_xmit(sk, 1); 592 dccp_write_xmit(sk, 1);
575 dccp_skb_entail(sk, skb); 593 dccp_skb_entail(sk, skb);
576 dccp_transmit_skb(sk, skb_clone(skb, prio)); 594 dccp_transmit_skb(sk, skb_clone(skb, prio));
577 /* FIXME do we need a retransmit timer here? */ 595 /*
596 * Retransmission timer for active-close: RFC 4340, 8.3 requires
597 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
598 * state can be left. The initial timeout is 2 RTTs.
599 * Since RTT measurement is done by the CCIDs, there is no easy
600 * way to get an RTT sample. The fallback RTT from RFC 4340, 3.4
601 * is too low (200ms); we use a high value to avoid unnecessary
602 * retransmissions when the link RTT is > 0.2 seconds.
603 * FIXME: Let main module sample RTTs and use that instead.
604 */
605 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
606 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
578 } else 607 } else
579 dccp_transmit_skb(sk, skb); 608 dccp_transmit_skb(sk, skb);
580} 609}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d84973928033..0bed4a6095b7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -60,8 +60,7 @@ void dccp_set_state(struct sock *sk, const int state)
60{ 60{
61 const int oldstate = sk->sk_state; 61 const int oldstate = sk->sk_state;
62 62
63 dccp_pr_debug("%s(%p) %-10.10s -> %s\n", 63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_role(sk), sk,
65 dccp_state_name(oldstate), dccp_state_name(state)); 64 dccp_state_name(oldstate), dccp_state_name(state));
66 WARN_ON(state == oldstate); 65 WARN_ON(state == oldstate);
67 66
@@ -72,7 +71,8 @@ void dccp_set_state(struct sock *sk, const int state)
72 break; 71 break;
73 72
74 case DCCP_CLOSED: 73 case DCCP_CLOSED:
75 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) 74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); 76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77 77
78 sk->sk_prot->unhash(sk); 78 sk->sk_prot->unhash(sk);
@@ -93,6 +93,24 @@ void dccp_set_state(struct sock *sk, const int state)
93 93
94EXPORT_SYMBOL_GPL(dccp_set_state); 94EXPORT_SYMBOL_GPL(dccp_set_state);
95 95
96static void dccp_finish_passive_close(struct sock *sk)
97{
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
103 break;
104 case DCCP_PASSIVE_CLOSEREQ:
105 /*
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 */
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
111 }
112}
113
96void dccp_done(struct sock *sk) 114void dccp_done(struct sock *sk)
97{ 115{
98 dccp_set_state(sk, DCCP_CLOSED); 116 dccp_set_state(sk, DCCP_CLOSED);
@@ -134,14 +152,17 @@ EXPORT_SYMBOL_GPL(dccp_packet_name);
134const char *dccp_state_name(const int state) 152const char *dccp_state_name(const int state)
135{ 153{
136 static char *dccp_state_names[] = { 154 static char *dccp_state_names[] = {
137 [DCCP_OPEN] = "OPEN", 155 [DCCP_OPEN] = "OPEN",
138 [DCCP_REQUESTING] = "REQUESTING", 156 [DCCP_REQUESTING] = "REQUESTING",
139 [DCCP_PARTOPEN] = "PARTOPEN", 157 [DCCP_PARTOPEN] = "PARTOPEN",
140 [DCCP_LISTEN] = "LISTEN", 158 [DCCP_LISTEN] = "LISTEN",
141 [DCCP_RESPOND] = "RESPOND", 159 [DCCP_RESPOND] = "RESPOND",
142 [DCCP_CLOSING] = "CLOSING", 160 [DCCP_CLOSING] = "CLOSING",
143 [DCCP_TIME_WAIT] = "TIME_WAIT", 161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
144 [DCCP_CLOSED] = "CLOSED", 162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
145 }; 166 };
146 167
147 if (state >= DCCP_MAX_STATES) 168 if (state >= DCCP_MAX_STATES)
@@ -174,6 +195,19 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 195
175 dccp_minisock_init(&dp->dccps_minisock); 196 dccp_minisock_init(&dp->dccps_minisock);
176 197
198 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
199 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
200 sk->sk_state = DCCP_CLOSED;
201 sk->sk_write_space = dccp_write_space;
202 icsk->icsk_sync_mss = dccp_sync_mss;
203 dp->dccps_mss_cache = 536;
204 dp->dccps_rate_last = jiffies;
205 dp->dccps_role = DCCP_ROLE_UNDEFINED;
206 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
207 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
208
209 dccp_init_xmit_timers(sk);
210
177 /* 211 /*
178 * FIXME: We're hardcoding the CCID, and doing this at this point makes 212 * FIXME: We're hardcoding the CCID, and doing this at this point makes
179 * the listening (master) sock get CCID control blocks, which is not 213 * the listening (master) sock get CCID control blocks, which is not
@@ -213,18 +247,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
213 INIT_LIST_HEAD(&dmsk->dccpms_conf); 247 INIT_LIST_HEAD(&dmsk->dccpms_conf);
214 } 248 }
215 249
216 dccp_init_xmit_timers(sk);
217 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
218 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
219 sk->sk_state = DCCP_CLOSED;
220 sk->sk_write_space = dccp_write_space;
221 icsk->icsk_sync_mss = dccp_sync_mss;
222 dp->dccps_mss_cache = 536;
223 dp->dccps_rate_last = jiffies;
224 dp->dccps_role = DCCP_ROLE_UNDEFINED;
225 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
226 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
227
228 return 0; 250 return 0;
229} 251}
230 252
@@ -275,6 +297,12 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
275 return inet_csk_listen_start(sk, backlog); 297 return inet_csk_listen_start(sk, backlog);
276} 298}
277 299
300static inline int dccp_need_reset(int state)
301{
302 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303 state != DCCP_REQUESTING;
304}
305
278int dccp_disconnect(struct sock *sk, int flags) 306int dccp_disconnect(struct sock *sk, int flags)
279{ 307{
280 struct inet_connection_sock *icsk = inet_csk(sk); 308 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -285,10 +313,15 @@ int dccp_disconnect(struct sock *sk, int flags)
285 if (old_state != DCCP_CLOSED) 313 if (old_state != DCCP_CLOSED)
286 dccp_set_state(sk, DCCP_CLOSED); 314 dccp_set_state(sk, DCCP_CLOSED);
287 315
288 /* ABORT function of RFC793 */ 316 /*
317 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319 */
289 if (old_state == DCCP_LISTEN) { 320 if (old_state == DCCP_LISTEN) {
290 inet_csk_listen_stop(sk); 321 inet_csk_listen_stop(sk);
291 /* FIXME: do the active reset thing */ 322 } else if (dccp_need_reset(old_state)) {
323 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324 sk->sk_err = ECONNRESET;
292 } else if (old_state == DCCP_REQUESTING) 325 } else if (old_state == DCCP_REQUESTING)
293 sk->sk_err = ECONNRESET; 326 sk->sk_err = ECONNRESET;
294 327
@@ -518,6 +551,12 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518 (struct dccp_so_feat __user *) 551 (struct dccp_so_feat __user *)
519 optval); 552 optval);
520 break; 553 break;
554 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
555 if (dp->dccps_role != DCCP_ROLE_SERVER)
556 err = -EOPNOTSUPP;
557 else
558 dp->dccps_server_timewait = (val != 0);
559 break;
521 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ 560 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
522 if (val < 0 || val > 15) 561 if (val < 0 || val > 15)
523 err = -EINVAL; 562 err = -EINVAL;
@@ -618,15 +657,15 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
618 (__be32 __user *)optval, optlen); 657 (__be32 __user *)optval, optlen);
619 case DCCP_SOCKOPT_GET_CUR_MPS: 658 case DCCP_SOCKOPT_GET_CUR_MPS:
620 val = dp->dccps_mss_cache; 659 val = dp->dccps_mss_cache;
621 len = sizeof(val); 660 break;
661 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
662 val = dp->dccps_server_timewait;
622 break; 663 break;
623 case DCCP_SOCKOPT_SEND_CSCOV: 664 case DCCP_SOCKOPT_SEND_CSCOV:
624 val = dp->dccps_pcslen; 665 val = dp->dccps_pcslen;
625 len = sizeof(val);
626 break; 666 break;
627 case DCCP_SOCKOPT_RECV_CSCOV: 667 case DCCP_SOCKOPT_RECV_CSCOV:
628 val = dp->dccps_pcrlen; 668 val = dp->dccps_pcrlen;
629 len = sizeof(val);
630 break; 669 break;
631 case 128 ... 191: 670 case 128 ... 191:
632 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 671 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
@@ -638,6 +677,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
638 return -ENOPROTOOPT; 677 return -ENOPROTOOPT;
639 } 678 }
640 679
680 len = sizeof(val);
641 if (put_user(len, optlen) || copy_to_user(optval, &val, len)) 681 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
642 return -EFAULT; 682 return -EFAULT;
643 683
@@ -748,19 +788,26 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
748 788
749 dh = dccp_hdr(skb); 789 dh = dccp_hdr(skb);
750 790
751 if (dh->dccph_type == DCCP_PKT_DATA || 791 switch (dh->dccph_type) {
752 dh->dccph_type == DCCP_PKT_DATAACK) 792 case DCCP_PKT_DATA:
793 case DCCP_PKT_DATAACK:
753 goto found_ok_skb; 794 goto found_ok_skb;
754 795
755 if (dh->dccph_type == DCCP_PKT_RESET || 796 case DCCP_PKT_CLOSE:
756 dh->dccph_type == DCCP_PKT_CLOSE) { 797 case DCCP_PKT_CLOSEREQ:
757 dccp_pr_debug("found fin ok!\n"); 798 if (!(flags & MSG_PEEK))
799 dccp_finish_passive_close(sk);
800 /* fall through */
801 case DCCP_PKT_RESET:
802 dccp_pr_debug("found fin (%s) ok!\n",
803 dccp_packet_name(dh->dccph_type));
758 len = 0; 804 len = 0;
759 goto found_fin_ok; 805 goto found_fin_ok;
806 default:
807 dccp_pr_debug("packet_type=%s\n",
808 dccp_packet_name(dh->dccph_type));
809 sk_eat_skb(sk, skb, 0);
760 } 810 }
761 dccp_pr_debug("packet_type=%s\n",
762 dccp_packet_name(dh->dccph_type));
763 sk_eat_skb(sk, skb, 0);
764verify_sock_status: 811verify_sock_status:
765 if (sock_flag(sk, SOCK_DONE)) { 812 if (sock_flag(sk, SOCK_DONE)) {
766 len = 0; 813 len = 0;
@@ -862,34 +909,38 @@ out:
862 909
863EXPORT_SYMBOL_GPL(inet_dccp_listen); 910EXPORT_SYMBOL_GPL(inet_dccp_listen);
864 911
865static const unsigned char dccp_new_state[] = { 912static void dccp_terminate_connection(struct sock *sk)
866 /* current state: new state: action: */
867 [0] = DCCP_CLOSED,
868 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
869 [DCCP_REQUESTING] = DCCP_CLOSED,
870 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
871 [DCCP_LISTEN] = DCCP_CLOSED,
872 [DCCP_RESPOND] = DCCP_CLOSED,
873 [DCCP_CLOSING] = DCCP_CLOSED,
874 [DCCP_TIME_WAIT] = DCCP_CLOSED,
875 [DCCP_CLOSED] = DCCP_CLOSED,
876};
877
878static int dccp_close_state(struct sock *sk)
879{ 913{
880 const int next = dccp_new_state[sk->sk_state]; 914 u8 next_state = DCCP_CLOSED;
881 const int ns = next & DCCP_STATE_MASK;
882 915
883 if (ns != sk->sk_state) 916 switch (sk->sk_state) {
884 dccp_set_state(sk, ns); 917 case DCCP_PASSIVE_CLOSE:
918 case DCCP_PASSIVE_CLOSEREQ:
919 dccp_finish_passive_close(sk);
920 break;
921 case DCCP_PARTOPEN:
922 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
923 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
924 /* fall through */
925 case DCCP_OPEN:
926 dccp_send_close(sk, 1);
885 927
886 return next & DCCP_ACTION_FIN; 928 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
929 !dccp_sk(sk)->dccps_server_timewait)
930 next_state = DCCP_ACTIVE_CLOSEREQ;
931 else
932 next_state = DCCP_CLOSING;
933 /* fall through */
934 default:
935 dccp_set_state(sk, next_state);
936 }
887} 937}
888 938
889void dccp_close(struct sock *sk, long timeout) 939void dccp_close(struct sock *sk, long timeout)
890{ 940{
891 struct dccp_sock *dp = dccp_sk(sk); 941 struct dccp_sock *dp = dccp_sk(sk);
892 struct sk_buff *skb; 942 struct sk_buff *skb;
943 u32 data_was_unread = 0;
893 int state; 944 int state;
894 945
895 lock_sock(sk); 946 lock_sock(sk);
@@ -912,16 +963,21 @@ void dccp_close(struct sock *sk, long timeout)
912 * descriptor close, not protocol-sourced closes, because the 963 * descriptor close, not protocol-sourced closes, because the
913 *reader process may not have drained the data yet! 964 *reader process may not have drained the data yet!
914 */ 965 */
915 /* FIXME: check for unread data */
916 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 966 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
967 data_was_unread += skb->len;
917 __kfree_skb(skb); 968 __kfree_skb(skb);
918 } 969 }
919 970
920 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 971 if (data_was_unread) {
972 /* Unread data was tossed, send an appropriate Reset Code */
973 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
974 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
975 dccp_set_state(sk, DCCP_CLOSED);
976 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
921 /* Check zero linger _after_ checking for unread data. */ 977 /* Check zero linger _after_ checking for unread data. */
922 sk->sk_prot->disconnect(sk, 0); 978 sk->sk_prot->disconnect(sk, 0);
923 } else if (dccp_close_state(sk)) { 979 } else if (sk->sk_state != DCCP_CLOSED) {
924 dccp_send_close(sk, 1); 980 dccp_terminate_connection(sk);
925 } 981 }
926 982
927 sk_stream_wait_close(sk, timeout); 983 sk_stream_wait_close(sk, timeout);
@@ -948,24 +1004,6 @@ adjudge_to_death:
948 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) 1004 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
949 goto out; 1005 goto out;
950 1006
951 /*
952 * The last release_sock may have processed the CLOSE or RESET
953 * packet moving sock to CLOSED state, if not we have to fire
954 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
955 * in draft-ietf-dccp-spec-11. -acme
956 */
957 if (sk->sk_state == DCCP_CLOSING) {
958 /* FIXME: should start at 2 * RTT */
959 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
960 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
961 inet_csk(sk)->icsk_rto,
962 DCCP_RTO_MAX);
963#if 0
964 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
965 dccp_set_state(sk, DCCP_CLOSED);
966#endif
967 }
968
969 if (sk->sk_state == DCCP_CLOSED) 1007 if (sk->sk_state == DCCP_CLOSED)
970 inet_csk_destroy_sock(sk); 1008 inet_csk_destroy_sock(sk);
971 1009
@@ -981,7 +1019,7 @@ EXPORT_SYMBOL_GPL(dccp_close);
981 1019
982void dccp_shutdown(struct sock *sk, int how) 1020void dccp_shutdown(struct sock *sk, int how)
983{ 1021{
984 dccp_pr_debug("entry\n"); 1022 dccp_pr_debug("called shutdown(%x)\n", how);
985} 1023}
986 1024
987EXPORT_SYMBOL_GPL(dccp_shutdown); 1025EXPORT_SYMBOL_GPL(dccp_shutdown);
@@ -1072,11 +1110,13 @@ static int __init dccp_init(void)
1072 } 1110 }
1073 1111
1074 for (i = 0; i < dccp_hashinfo.ehash_size; i++) { 1112 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1075 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1076 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); 1113 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1077 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); 1114 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1078 } 1115 }
1079 1116
1117 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1118 goto out_free_dccp_ehash;
1119
1080 bhash_order = ehash_order; 1120 bhash_order = ehash_order;
1081 1121
1082 do { 1122 do {
@@ -1091,7 +1131,7 @@ static int __init dccp_init(void)
1091 1131
1092 if (!dccp_hashinfo.bhash) { 1132 if (!dccp_hashinfo.bhash) {
1093 DCCP_CRIT("Failed to allocate DCCP bind hash table"); 1133 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1094 goto out_free_dccp_ehash; 1134 goto out_free_dccp_locks;
1095 } 1135 }
1096 1136
1097 for (i = 0; i < dccp_hashinfo.bhash_size; i++) { 1137 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
@@ -1121,6 +1161,8 @@ out_free_dccp_mib:
1121out_free_dccp_bhash: 1161out_free_dccp_bhash:
1122 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); 1162 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1123 dccp_hashinfo.bhash = NULL; 1163 dccp_hashinfo.bhash = NULL;
1164out_free_dccp_locks:
1165 inet_ehash_locks_free(&dccp_hashinfo);
1124out_free_dccp_ehash: 1166out_free_dccp_ehash:
1125 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); 1167 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1126 dccp_hashinfo.ehash = NULL; 1168 dccp_hashinfo.ehash = NULL;
@@ -1139,6 +1181,7 @@ static void __exit dccp_fini(void)
1139 free_pages((unsigned long)dccp_hashinfo.ehash, 1181 free_pages((unsigned long)dccp_hashinfo.ehash,
1140 get_order(dccp_hashinfo.ehash_size * 1182 get_order(dccp_hashinfo.ehash_size *
1141 sizeof(struct inet_ehash_bucket))); 1183 sizeof(struct inet_ehash_bucket)));
1184 inet_ehash_locks_free(&dccp_hashinfo);
1142 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1185 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1143 dccp_ackvec_exit(); 1186 dccp_ackvec_exit();
1144 dccp_sysctl_exit(); 1187 dccp_sysctl_exit();
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index c62c05039f69..21295993fdb8 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -100,41 +100,19 @@ static struct ctl_table dccp_default_table[] = {
100 { .ctl_name = 0, } 100 { .ctl_name = 0, }
101}; 101};
102 102
103static struct ctl_table dccp_table[] = { 103static struct ctl_path dccp_path[] = {
104 { 104 { .procname = "net", .ctl_name = CTL_NET, },
105 .ctl_name = NET_DCCP_DEFAULT, 105 { .procname = "dccp", .ctl_name = NET_DCCP, },
106 .procname = "default", 106 { .procname = "default", .ctl_name = NET_DCCP_DEFAULT, },
107 .mode = 0555, 107 { }
108 .child = dccp_default_table,
109 },
110 { .ctl_name = 0, },
111};
112
113static struct ctl_table dccp_dir_table[] = {
114 {
115 .ctl_name = NET_DCCP,
116 .procname = "dccp",
117 .mode = 0555,
118 .child = dccp_table,
119 },
120 { .ctl_name = 0, },
121};
122
123static struct ctl_table dccp_root_table[] = {
124 {
125 .ctl_name = CTL_NET,
126 .procname = "net",
127 .mode = 0555,
128 .child = dccp_dir_table,
129 },
130 { .ctl_name = 0, },
131}; 108};
132 109
133static struct ctl_table_header *dccp_table_header; 110static struct ctl_table_header *dccp_table_header;
134 111
135int __init dccp_sysctl_init(void) 112int __init dccp_sysctl_init(void)
136{ 113{
137 dccp_table_header = register_sysctl_table(dccp_root_table); 114 dccp_table_header = register_sysctl_paths(dccp_path,
115 dccp_default_table);
138 116
139 return dccp_table_header != NULL ? 0 : -ENOMEM; 117 return dccp_table_header != NULL ? 0 : -ENOMEM;
140} 118}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3af067354bd4..8703a792b560 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -280,9 +280,8 @@ static void dccp_init_write_xmit_timer(struct sock *sk)
280{ 280{
281 struct dccp_sock *dp = dccp_sk(sk); 281 struct dccp_sock *dp = dccp_sk(sk);
282 282
283 init_timer(&dp->dccps_xmit_timer); 283 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
284 dp->dccps_xmit_timer.data = (unsigned long)sk; 284 (unsigned long)sk);
285 dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
286} 285}
287 286
288void dccp_init_xmit_timers(struct sock *sk) 287void dccp_init_xmit_timers(struct sock *sk)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index aabe98d9402f..acd48ee522d6 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -474,7 +474,7 @@ static struct proto dn_proto = {
474static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) 474static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp)
475{ 475{
476 struct dn_scp *scp; 476 struct dn_scp *scp;
477 struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); 477 struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto);
478 478
479 if (!sk) 479 if (!sk)
480 goto out; 480 goto out;
@@ -1904,7 +1904,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
1904 struct sk_buff *skb = sock_alloc_send_skb(sk, datalen, 1904 struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
1905 noblock, errcode); 1905 noblock, errcode);
1906 if (skb) { 1906 if (skb) {
1907 skb->protocol = __constant_htons(ETH_P_DNA_RT); 1907 skb->protocol = htons(ETH_P_DNA_RT);
1908 skb->pkt_type = PACKET_OUTGOING; 1908 skb->pkt_type = PACKET_OUTGOING;
1909 } 1909 }
1910 return skb; 1910 return skb;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 26130afd8029..1bbfce5f7a2d 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -173,10 +173,6 @@ static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
173static struct dn_dev_sysctl_table { 173static struct dn_dev_sysctl_table {
174 struct ctl_table_header *sysctl_header; 174 struct ctl_table_header *sysctl_header;
175 ctl_table dn_dev_vars[5]; 175 ctl_table dn_dev_vars[5];
176 ctl_table dn_dev_dev[2];
177 ctl_table dn_dev_conf_dir[2];
178 ctl_table dn_dev_proto_dir[2];
179 ctl_table dn_dev_root_dir[2];
180} dn_dev_sysctl = { 176} dn_dev_sysctl = {
181 NULL, 177 NULL,
182 { 178 {
@@ -224,30 +220,6 @@ static struct dn_dev_sysctl_table {
224 }, 220 },
225 {0} 221 {0}
226 }, 222 },
227 {{
228 .ctl_name = 0,
229 .procname = "",
230 .mode = 0555,
231 .child = dn_dev_sysctl.dn_dev_vars
232 }, {0}},
233 {{
234 .ctl_name = NET_DECNET_CONF,
235 .procname = "conf",
236 .mode = 0555,
237 .child = dn_dev_sysctl.dn_dev_dev
238 }, {0}},
239 {{
240 .ctl_name = NET_DECNET,
241 .procname = "decnet",
242 .mode = 0555,
243 .child = dn_dev_sysctl.dn_dev_conf_dir
244 }, {0}},
245 {{
246 .ctl_name = CTL_NET,
247 .procname = "net",
248 .mode = 0555,
249 .child = dn_dev_sysctl.dn_dev_proto_dir
250 }, {0}}
251}; 223};
252 224
253static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms) 225static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
@@ -255,6 +227,16 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
255 struct dn_dev_sysctl_table *t; 227 struct dn_dev_sysctl_table *t;
256 int i; 228 int i;
257 229
230#define DN_CTL_PATH_DEV 3
231
232 struct ctl_path dn_ctl_path[] = {
233 { .procname = "net", .ctl_name = CTL_NET, },
234 { .procname = "decnet", .ctl_name = NET_DECNET, },
235 { .procname = "conf", .ctl_name = NET_DECNET_CONF, },
236 { /* to be set */ },
237 { },
238 };
239
258 t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL); 240 t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
259 if (t == NULL) 241 if (t == NULL)
260 return; 242 return;
@@ -265,20 +247,16 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
265 } 247 }
266 248
267 if (dev) { 249 if (dev) {
268 t->dn_dev_dev[0].procname = dev->name; 250 dn_ctl_path[DN_CTL_PATH_DEV].procname = dev->name;
269 t->dn_dev_dev[0].ctl_name = dev->ifindex; 251 dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = dev->ifindex;
270 } else { 252 } else {
271 t->dn_dev_dev[0].procname = parms->name; 253 dn_ctl_path[DN_CTL_PATH_DEV].procname = parms->name;
272 t->dn_dev_dev[0].ctl_name = parms->ctl_name; 254 dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = parms->ctl_name;
273 } 255 }
274 256
275 t->dn_dev_dev[0].child = t->dn_dev_vars;
276 t->dn_dev_conf_dir[0].child = t->dn_dev_dev;
277 t->dn_dev_proto_dir[0].child = t->dn_dev_conf_dir;
278 t->dn_dev_root_dir[0].child = t->dn_dev_proto_dir;
279 t->dn_dev_vars[0].extra1 = (void *)dev; 257 t->dn_dev_vars[0].extra1 = (void *)dev;
280 258
281 t->sysctl_header = register_sysctl_table(t->dn_dev_root_dir); 259 t->sysctl_header = register_sysctl_paths(dn_ctl_path, t->dn_dev_vars);
282 if (t->sysctl_header == NULL) 260 if (t->sysctl_header == NULL)
283 kfree(t); 261 kfree(t);
284 else 262 else
@@ -647,20 +625,26 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
647 625
648static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 626static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
649{ 627{
628 struct net *net = skb->sk->sk_net;
650 struct nlattr *tb[IFA_MAX+1]; 629 struct nlattr *tb[IFA_MAX+1];
651 struct dn_dev *dn_db; 630 struct dn_dev *dn_db;
652 struct ifaddrmsg *ifm; 631 struct ifaddrmsg *ifm;
653 struct dn_ifaddr *ifa, **ifap; 632 struct dn_ifaddr *ifa, **ifap;
654 int err = -EADDRNOTAVAIL; 633 int err = -EINVAL;
634
635 if (net != &init_net)
636 goto errout;
655 637
656 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); 638 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
657 if (err < 0) 639 if (err < 0)
658 goto errout; 640 goto errout;
659 641
642 err = -ENODEV;
660 ifm = nlmsg_data(nlh); 643 ifm = nlmsg_data(nlh);
661 if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL) 644 if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL)
662 goto errout; 645 goto errout;
663 646
647 err = -EADDRNOTAVAIL;
664 for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { 648 for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) {
665 if (tb[IFA_LOCAL] && 649 if (tb[IFA_LOCAL] &&
666 nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) 650 nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
@@ -679,6 +663,7 @@ errout:
679 663
680static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 664static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
681{ 665{
666 struct net *net = skb->sk->sk_net;
682 struct nlattr *tb[IFA_MAX+1]; 667 struct nlattr *tb[IFA_MAX+1];
683 struct net_device *dev; 668 struct net_device *dev;
684 struct dn_dev *dn_db; 669 struct dn_dev *dn_db;
@@ -686,6 +671,9 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
686 struct dn_ifaddr *ifa; 671 struct dn_ifaddr *ifa;
687 int err; 672 int err;
688 673
674 if (net != &init_net)
675 return -EINVAL;
676
689 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); 677 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
690 if (err < 0) 678 if (err < 0)
691 return err; 679 return err;
@@ -783,19 +771,23 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
783 kfree_skb(skb); 771 kfree_skb(skb);
784 goto errout; 772 goto errout;
785 } 773 }
786 err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); 774 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
787errout: 775errout:
788 if (err < 0) 776 if (err < 0)
789 rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); 777 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
790} 778}
791 779
792static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 780static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
793{ 781{
782 struct net *net = skb->sk->sk_net;
794 int idx, dn_idx = 0, skip_ndevs, skip_naddr; 783 int idx, dn_idx = 0, skip_ndevs, skip_naddr;
795 struct net_device *dev; 784 struct net_device *dev;
796 struct dn_dev *dn_db; 785 struct dn_dev *dn_db;
797 struct dn_ifaddr *ifa; 786 struct dn_ifaddr *ifa;
798 787
788 if (net != &init_net)
789 return 0;
790
799 skip_ndevs = cb->args[0]; 791 skip_ndevs = cb->args[0];
800 skip_naddr = cb->args[1]; 792 skip_naddr = cb->args[1];
801 793
@@ -1439,7 +1431,7 @@ static const struct file_operations dn_dev_seq_fops = {
1439 1431
1440#endif /* CONFIG_PROC_FS */ 1432#endif /* CONFIG_PROC_FS */
1441 1433
1442static int __initdata addr[2]; 1434static int addr[2];
1443module_param_array(addr, int, NULL, 0444); 1435module_param_array(addr, int, NULL, 0444);
1444MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node"); 1436MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
1445 1437
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 3760a20d10d0..4aa9a423e606 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -203,8 +203,6 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct
203 struct flowi fl; 203 struct flowi fl;
204 struct dn_fib_res res; 204 struct dn_fib_res res;
205 205
206 memset(&fl, 0, sizeof(fl));
207
208 if (nh->nh_flags&RTNH_F_ONLINK) { 206 if (nh->nh_flags&RTNH_F_ONLINK) {
209 struct net_device *dev; 207 struct net_device *dev;
210 208
@@ -506,10 +504,14 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
506 504
507static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 505static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
508{ 506{
507 struct net *net = skb->sk->sk_net;
509 struct dn_fib_table *tb; 508 struct dn_fib_table *tb;
510 struct rtattr **rta = arg; 509 struct rtattr **rta = arg;
511 struct rtmsg *r = NLMSG_DATA(nlh); 510 struct rtmsg *r = NLMSG_DATA(nlh);
512 511
512 if (net != &init_net)
513 return -EINVAL;
514
513 if (dn_fib_check_attr(r, rta)) 515 if (dn_fib_check_attr(r, rta))
514 return -EINVAL; 516 return -EINVAL;
515 517
@@ -522,10 +524,14 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
522 524
523static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 525static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
524{ 526{
527 struct net *net = skb->sk->sk_net;
525 struct dn_fib_table *tb; 528 struct dn_fib_table *tb;
526 struct rtattr **rta = arg; 529 struct rtattr **rta = arg;
527 struct rtmsg *r = NLMSG_DATA(nlh); 530 struct rtmsg *r = NLMSG_DATA(nlh);
528 531
532 if (net != &init_net)
533 return -EINVAL;
534
529 if (dn_fib_check_attr(r, rta)) 535 if (dn_fib_check_attr(r, rta))
530 return -EINVAL; 536 return -EINVAL;
531 537
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index e851b143cca3..1ca13b17974d 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -580,8 +580,8 @@ static const struct seq_operations dn_neigh_seq_ops = {
580 580
581static int dn_neigh_seq_open(struct inode *inode, struct file *file) 581static int dn_neigh_seq_open(struct inode *inode, struct file *file)
582{ 582{
583 return seq_open_private(file, &dn_neigh_seq_ops, 583 return seq_open_net(inode, file, &dn_neigh_seq_ops,
584 sizeof(struct neigh_seq_state)); 584 sizeof(struct neigh_seq_state));
585} 585}
586 586
587static const struct file_operations dn_neigh_seq_fops = { 587static const struct file_operations dn_neigh_seq_fops = {
@@ -589,7 +589,7 @@ static const struct file_operations dn_neigh_seq_fops = {
589 .open = dn_neigh_seq_open, 589 .open = dn_neigh_seq_open,
590 .read = seq_read, 590 .read = seq_read,
591 .llseek = seq_lseek, 591 .llseek = seq_lseek,
592 .release = seq_release_private, 592 .release = seq_release_net,
593}; 593};
594 594
595#endif 595#endif
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 7404653880b0..1964faf203e4 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -124,7 +124,7 @@ struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
124 if ((skb = alloc_skb(size + hdr, pri)) == NULL) 124 if ((skb = alloc_skb(size + hdr, pri)) == NULL)
125 return NULL; 125 return NULL;
126 126
127 skb->protocol = __constant_htons(ETH_P_DNA_RT); 127 skb->protocol = htons(ETH_P_DNA_RT);
128 skb->pkt_type = PACKET_OUTGOING; 128 skb->pkt_type = PACKET_OUTGOING;
129 129
130 if (sk) 130 if (sk)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 97eee5e8fbbe..31be29b8b5a3 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -107,7 +107,7 @@ static const int dn_rt_mtu_expires = 10 * 60 * HZ;
107 107
108static unsigned long dn_rt_deadline; 108static unsigned long dn_rt_deadline;
109 109
110static int dn_dst_gc(void); 110static int dn_dst_gc(struct dst_ops *ops);
111static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 111static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
112static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 112static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
113static void dn_dst_link_failure(struct sk_buff *); 113static void dn_dst_link_failure(struct sk_buff *);
@@ -185,7 +185,7 @@ static void dn_dst_check_expire(unsigned long dummy)
185 mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ); 185 mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
186} 186}
187 187
188static int dn_dst_gc(void) 188static int dn_dst_gc(struct dst_ops *ops)
189{ 189{
190 struct dn_route *rt, **rtp; 190 struct dn_route *rt, **rtp;
191 int i; 191 int i;
@@ -293,9 +293,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
293 dn_rt_hash_table[hash].chain); 293 dn_rt_hash_table[hash].chain);
294 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); 294 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
295 295
296 rth->u.dst.__use++; 296 dst_use(&rth->u.dst, now);
297 dst_hold(&rth->u.dst);
298 rth->u.dst.lastuse = now;
299 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 297 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
300 298
301 dnrt_drop(rt); 299 dnrt_drop(rt);
@@ -308,9 +306,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
308 rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain); 306 rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain);
309 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); 307 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
310 308
311 dst_hold(&rt->u.dst); 309 dst_use(&rt->u.dst, now);
312 rt->u.dst.__use++;
313 rt->u.dst.lastuse = now;
314 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 310 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
315 *rp = rt; 311 *rp = rt;
316 return 0; 312 return 0;
@@ -769,17 +765,6 @@ drop:
769} 765}
770 766
771/* 767/*
772 * Drop packet. This is used for endnodes and for
773 * when we should not be forwarding packets from
774 * this dest.
775 */
776static int dn_blackhole(struct sk_buff *skb)
777{
778 kfree_skb(skb);
779 return NET_RX_DROP;
780}
781
782/*
783 * Used to catch bugs. This should never normally get 768 * Used to catch bugs. This should never normally get
784 * called. 769 * called.
785 */ 770 */
@@ -999,7 +984,7 @@ source_ok:
999 * here 984 * here
1000 */ 985 */
1001 if (!try_hard) { 986 if (!try_hard) {
1002 neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst); 987 neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst);
1003 if (neigh) { 988 if (neigh) {
1004 if ((oldflp->oif && 989 if ((oldflp->oif &&
1005 (neigh->dev->ifindex != oldflp->oif)) || 990 (neigh->dev->ifindex != oldflp->oif)) ||
@@ -1182,9 +1167,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
1182 (flp->mark == rt->fl.mark) && 1167 (flp->mark == rt->fl.mark) &&
1183 (rt->fl.iif == 0) && 1168 (rt->fl.iif == 0) &&
1184 (rt->fl.oif == flp->oif)) { 1169 (rt->fl.oif == flp->oif)) {
1185 rt->u.dst.lastuse = jiffies; 1170 dst_use(&rt->u.dst, jiffies);
1186 dst_hold(&rt->u.dst);
1187 rt->u.dst.__use++;
1188 rcu_read_unlock_bh(); 1171 rcu_read_unlock_bh();
1189 *pprt = &rt->u.dst; 1172 *pprt = &rt->u.dst;
1190 return 0; 1173 return 0;
@@ -1213,7 +1196,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock
1213 1196
1214 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); 1197 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
1215 if (err == 0 && fl->proto) { 1198 if (err == 0 && fl->proto) {
1216 err = xfrm_lookup(pprt, fl, sk, !(flags & MSG_DONTWAIT)); 1199 err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ?
1200 0 : XFRM_LOOKUP_WAIT);
1217 } 1201 }
1218 return err; 1202 return err;
1219} 1203}
@@ -1402,7 +1386,7 @@ make_route:
1402 default: 1386 default:
1403 case RTN_UNREACHABLE: 1387 case RTN_UNREACHABLE:
1404 case RTN_BLACKHOLE: 1388 case RTN_BLACKHOLE:
1405 rt->u.dst.input = dn_blackhole; 1389 rt->u.dst.input = dst_discard;
1406 } 1390 }
1407 rt->rt_flags = flags; 1391 rt->rt_flags = flags;
1408 if (rt->u.dst.dev) 1392 if (rt->u.dst.dev)
@@ -1456,9 +1440,7 @@ int dn_route_input(struct sk_buff *skb)
1456 (rt->fl.oif == 0) && 1440 (rt->fl.oif == 0) &&
1457 (rt->fl.mark == skb->mark) && 1441 (rt->fl.mark == skb->mark) &&
1458 (rt->fl.iif == cb->iif)) { 1442 (rt->fl.iif == cb->iif)) {
1459 rt->u.dst.lastuse = jiffies; 1443 dst_use(&rt->u.dst, jiffies);
1460 dst_hold(&rt->u.dst);
1461 rt->u.dst.__use++;
1462 rcu_read_unlock(); 1444 rcu_read_unlock();
1463 skb->dst = (struct dst_entry *)rt; 1445 skb->dst = (struct dst_entry *)rt;
1464 return 0; 1446 return 0;
@@ -1530,6 +1512,7 @@ rtattr_failure:
1530 */ 1512 */
1531static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 1513static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1532{ 1514{
1515 struct net *net = in_skb->sk->sk_net;
1533 struct rtattr **rta = arg; 1516 struct rtattr **rta = arg;
1534 struct rtmsg *rtm = NLMSG_DATA(nlh); 1517 struct rtmsg *rtm = NLMSG_DATA(nlh);
1535 struct dn_route *rt = NULL; 1518 struct dn_route *rt = NULL;
@@ -1538,6 +1521,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1538 struct sk_buff *skb; 1521 struct sk_buff *skb;
1539 struct flowi fl; 1522 struct flowi fl;
1540 1523
1524 if (net != &init_net)
1525 return -EINVAL;
1526
1541 memset(&fl, 0, sizeof(fl)); 1527 memset(&fl, 0, sizeof(fl));
1542 fl.proto = DNPROTO_NSP; 1528 fl.proto = DNPROTO_NSP;
1543 1529
@@ -1565,7 +1551,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1565 kfree_skb(skb); 1551 kfree_skb(skb);
1566 return -ENODEV; 1552 return -ENODEV;
1567 } 1553 }
1568 skb->protocol = __constant_htons(ETH_P_DNA_RT); 1554 skb->protocol = htons(ETH_P_DNA_RT);
1569 skb->dev = dev; 1555 skb->dev = dev;
1570 cb->src = fl.fld_src; 1556 cb->src = fl.fld_src;
1571 cb->dst = fl.fld_dst; 1557 cb->dst = fl.fld_dst;
@@ -1602,7 +1588,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1602 goto out_free; 1588 goto out_free;
1603 } 1589 }
1604 1590
1605 return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 1591 return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
1606 1592
1607out_free: 1593out_free:
1608 kfree_skb(skb); 1594 kfree_skb(skb);
@@ -1615,10 +1601,14 @@ out_free:
1615 */ 1601 */
1616int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) 1602int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1617{ 1603{
1604 struct net *net = skb->sk->sk_net;
1618 struct dn_route *rt; 1605 struct dn_route *rt;
1619 int h, s_h; 1606 int h, s_h;
1620 int idx, s_idx; 1607 int idx, s_idx;
1621 1608
1609 if (net != &init_net)
1610 return 0;
1611
1622 if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) 1612 if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg))
1623 return -EINVAL; 1613 return -EINVAL;
1624 if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)) 1614 if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED))
@@ -1673,12 +1663,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
1673 break; 1663 break;
1674 rcu_read_unlock_bh(); 1664 rcu_read_unlock_bh();
1675 } 1665 }
1676 return rt; 1666 return rcu_dereference(rt);
1677} 1667}
1678 1668
1679static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt) 1669static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
1680{ 1670{
1681 struct dn_rt_cache_iter_state *s = rcu_dereference(seq->private); 1671 struct dn_rt_cache_iter_state *s = seq->private;
1682 1672
1683 rt = rt->u.dst.dn_next; 1673 rt = rt->u.dst.dn_next;
1684 while(!rt) { 1674 while(!rt) {
@@ -1688,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
1688 rcu_read_lock_bh(); 1678 rcu_read_lock_bh();
1689 rt = dn_rt_hash_table[s->bucket].chain; 1679 rt = dn_rt_hash_table[s->bucket].chain;
1690 } 1680 }
1691 return rt; 1681 return rcu_dereference(rt);
1692} 1682}
1693 1683
1694static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 1684static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1760,8 +1750,7 @@ void __init dn_route_init(void)
1760 dn_dst_ops.kmem_cachep = 1750 dn_dst_ops.kmem_cachep =
1761 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1751 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1762 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1752 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1763 init_timer(&dn_route_timer); 1753 setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
1764 dn_route_timer.function = dn_dst_check_expire;
1765 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1754 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1766 add_timer(&dn_route_timer); 1755 add_timer(&dn_route_timer);
1767 1756
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index ddd3f04f0919..5b7539b7fe0c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -48,15 +48,6 @@ struct dn_fib_rule
48 u8 flags; 48 u8 flags;
49}; 49};
50 50
51static struct dn_fib_rule default_rule = {
52 .common = {
53 .refcnt = ATOMIC_INIT(2),
54 .pref = 0x7fff,
55 .table = RT_TABLE_MAIN,
56 .action = FR_ACT_TO_TBL,
57 },
58};
59
60 51
61int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) 52int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
62{ 53{
@@ -221,7 +212,7 @@ nla_put_failure:
221 return -ENOBUFS; 212 return -ENOBUFS;
222} 213}
223 214
224static u32 dn_fib_rule_default_pref(void) 215static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
225{ 216{
226 struct list_head *pos; 217 struct list_head *pos;
227 struct fib_rule *rule; 218 struct fib_rule *rule;
@@ -258,12 +249,13 @@ static struct fib_rules_ops dn_fib_rules_ops = {
258 .policy = dn_fib_rule_policy, 249 .policy = dn_fib_rule_policy,
259 .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list), 250 .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list),
260 .owner = THIS_MODULE, 251 .owner = THIS_MODULE,
252 .fro_net = &init_net,
261}; 253};
262 254
263void __init dn_fib_rules_init(void) 255void __init dn_fib_rules_init(void)
264{ 256{
265 list_add_tail(&default_rule.common.list, 257 BUG_ON(fib_default_rule_add(&dn_fib_rules_ops, 0x7fff,
266 &dn_fib_rules_ops.rules_list); 258 RT_TABLE_MAIN, 0));
267 fib_rules_register(&dn_fib_rules_ops); 259 fib_rules_register(&dn_fib_rules_ops);
268} 260}
269 261
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index fda0772fa215..e09d915dbd77 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -375,10 +375,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
375 kfree_skb(skb); 375 kfree_skb(skb);
376 goto errout; 376 goto errout;
377 } 377 }
378 err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); 378 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
379errout: 379errout:
380 if (err < 0) 380 if (err < 0)
381 rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); 381 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
382} 382}
383 383
384static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, 384static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
@@ -463,12 +463,16 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
463 463
464int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) 464int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
465{ 465{
466 struct net *net = skb->sk->sk_net;
466 unsigned int h, s_h; 467 unsigned int h, s_h;
467 unsigned int e = 0, s_e; 468 unsigned int e = 0, s_e;
468 struct dn_fib_table *tb; 469 struct dn_fib_table *tb;
469 struct hlist_node *node; 470 struct hlist_node *node;
470 int dumped = 0; 471 int dumped = 0;
471 472
473 if (net != &init_net)
474 return 0;
475
472 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && 476 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
473 ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) 477 ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
474 return dn_cache_dump(skb, cb); 478 return dn_cache_dump(skb, cb);
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
index ecdb3f9f14ca..2f81de5e752f 100644
--- a/net/decnet/netfilter/Kconfig
+++ b/net/decnet/netfilter/Kconfig
@@ -4,6 +4,7 @@
4 4
5menu "DECnet: Netfilter Configuration" 5menu "DECnet: Netfilter Configuration"
6 depends on DECNET && NETFILTER && EXPERIMENTAL 6 depends on DECNET && NETFILTER && EXPERIMENTAL
7 depends on NETFILTER_ADVANCED
7 8
8config DECNET_NF_GRABULATOR 9config DECNET_NF_GRABULATOR
9 tristate "Routing message grabulator (for userland routing daemon)" 10 tristate "Routing message grabulator (for userland routing daemon)"
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 43fcd29046d1..6d2bd3202048 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
115 RCV_SKB_FAIL(-EINVAL); 115 RCV_SKB_FAIL(-EINVAL);
116} 116}
117 117
118static struct nf_hook_ops dnrmg_ops = { 118static struct nf_hook_ops dnrmg_ops __read_mostly = {
119 .hook = dnrmg_hook, 119 .hook = dnrmg_hook,
120 .pf = PF_DECnet, 120 .pf = PF_DECnet,
121 .hooknum = NF_DN_ROUTE, 121 .hooknum = NF_DN_ROUTE,
@@ -137,7 +137,7 @@ static int __init dn_rtmsg_init(void)
137 137
138 rv = nf_register_hook(&dnrmg_ops); 138 rv = nf_register_hook(&dnrmg_ops);
139 if (rv) { 139 if (rv) {
140 sock_release(dnrmg->sk_socket); 140 netlink_kernel_release(dnrmg);
141 } 141 }
142 142
143 return rv; 143 return rv;
@@ -146,7 +146,7 @@ static int __init dn_rtmsg_init(void)
146static void __exit dn_rtmsg_fini(void) 146static void __exit dn_rtmsg_fini(void)
147{ 147{
148 nf_unregister_hook(&dnrmg_ops); 148 nf_unregister_hook(&dnrmg_ops);
149 sock_release(dnrmg->sk_socket); 149 netlink_kernel_release(dnrmg);
150} 150}
151 151
152 152
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index ae354a43fb97..228067c571ba 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -470,28 +470,15 @@ static ctl_table dn_table[] = {
470 {0} 470 {0}
471}; 471};
472 472
473static ctl_table dn_dir_table[] = { 473static struct ctl_path dn_path[] = {
474 { 474 { .procname = "net", .ctl_name = CTL_NET, },
475 .ctl_name = NET_DECNET, 475 { .procname = "decnet", .ctl_name = NET_DECNET, },
476 .procname = "decnet", 476 { }
477 .mode = 0555,
478 .child = dn_table},
479 {0}
480};
481
482static ctl_table dn_root_table[] = {
483 {
484 .ctl_name = CTL_NET,
485 .procname = "net",
486 .mode = 0555,
487 .child = dn_dir_table
488 },
489 {0}
490}; 477};
491 478
492void dn_register_sysctl(void) 479void dn_register_sysctl(void)
493{ 480{
494 dn_table_header = register_sysctl_table(dn_root_table); 481 dn_table_header = register_sysctl_paths(dn_path, dn_table);
495} 482}
496 483
497void dn_unregister_sysctl(void) 484void dn_unregister_sysctl(void)
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 9cae16b4e0b7..bc0f6252613f 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -624,7 +624,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol)
624 sock->state = SS_UNCONNECTED; 624 sock->state = SS_UNCONNECTED;
625 625
626 err = -ENOBUFS; 626 err = -ENOBUFS;
627 sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); 627 sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto);
628 if (sk == NULL) 628 if (sk == NULL)
629 goto out; 629 goto out;
630 630
@@ -1014,9 +1014,8 @@ static int __init aun_udp_initialise(void)
1014 1014
1015 skb_queue_head_init(&aun_queue); 1015 skb_queue_head_init(&aun_queue);
1016 spin_lock_init(&aun_queue_lock); 1016 spin_lock_init(&aun_queue_lock);
1017 init_timer(&ab_cleanup_timer); 1017 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
1018 ab_cleanup_timer.expires = jiffies + (HZ*2); 1018 ab_cleanup_timer.expires = jiffies + (HZ*2);
1019 ab_cleanup_timer.function = ab_cleanup;
1020 add_timer(&ab_cleanup_timer); 1019 add_timer(&ab_cleanup_timer);
1021 1020
1022 memset(&sin, 0, sizeof(sin)); 1021 memset(&sin, 0, sizeof(sin));
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 6b2e454ae313..a7b417523e9b 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -359,10 +359,34 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
359} 359}
360EXPORT_SYMBOL(alloc_etherdev_mq); 360EXPORT_SYMBOL(alloc_etherdev_mq);
361 361
362char *print_mac(char *buf, const u8 *addr) 362static size_t _format_mac_addr(char *buf, int buflen,
363 const unsigned char *addr, int len)
363{ 364{
364 sprintf(buf, MAC_FMT, 365 int i;
365 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); 366 char *cp = buf;
367
368 for (i = 0; i < len; i++) {
369 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
370 if (i == len - 1)
371 break;
372 cp += strlcpy(cp, ":", buflen - (cp - buf));
373 }
374 return cp - buf;
375}
376
377ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
378{
379 size_t l;
380
381 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
382 l += strlcpy(buf + l, "\n", PAGE_SIZE - l);
383 return ((ssize_t) l);
384}
385EXPORT_SYMBOL(sysfs_format_mac);
386
387char *print_mac(char *buf, const unsigned char *addr)
388{
389 _format_mac_addr(buf, MAC_BUF_SIZE, addr, ETH_ALEN);
366 return buf; 390 return buf;
367} 391}
368EXPORT_SYMBOL(print_mac); 392EXPORT_SYMBOL(print_mac);
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 1438adedbc83..bd501046c9c0 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -1,8 +1,9 @@
1config IEEE80211 1config IEEE80211
2 tristate "Generic IEEE 802.11 Networking Stack" 2 tristate "Generic IEEE 802.11 Networking Stack (DEPRECATED)"
3 ---help--- 3 ---help---
4 This option enables the hardware independent IEEE 802.11 4 This option enables the hardware independent IEEE 802.11
5 networking stack. 5 networking stack. This component is deprecated in favor of the
6 mac80211 component.
6 7
7config IEEE80211_DEBUG 8config IEEE80211_DEBUG
8 bool "Enable full debugging output" 9 bool "Enable full debugging output"
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index 0936a3e0210b..208bf35b5546 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -25,7 +25,6 @@
25#include <net/ieee80211.h> 25#include <net/ieee80211.h>
26 26
27#include <linux/crypto.h> 27#include <linux/crypto.h>
28#include <asm/scatterlist.h>
29 28
30MODULE_AUTHOR("Jouni Malinen"); 29MODULE_AUTHOR("Jouni Malinen");
31MODULE_DESCRIPTION("Host AP crypt: CCMP"); 30MODULE_DESCRIPTION("Host AP crypt: CCMP");
@@ -339,7 +338,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
339 pos += 8; 338 pos += 8;
340 339
341 if (ccmp_replay_check(pn, key->rx_pn)) { 340 if (ccmp_replay_check(pn, key->rx_pn)) {
342 if (net_ratelimit()) { 341 if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) {
343 IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%s " 342 IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%s "
344 "previous PN %02x%02x%02x%02x%02x%02x " 343 "previous PN %02x%02x%02x%02x%02x%02x "
345 "received PN %02x%02x%02x%02x%02x%02x\n", 344 "received PN %02x%02x%02x%02x%02x%02x\n",
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 4cce3534e408..bba0152e2d71 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -25,7 +25,6 @@
25#include <net/ieee80211.h> 25#include <net/ieee80211.h>
26 26
27#include <linux/crypto.h> 27#include <linux/crypto.h>
28#include <linux/scatterlist.h>
29#include <linux/crc32.h> 28#include <linux/crc32.h>
30 29
31MODULE_AUTHOR("Jouni Malinen"); 30MODULE_AUTHOR("Jouni Malinen");
@@ -190,7 +189,7 @@ static inline u16 Mk16(u8 hi, u8 lo)
190 return lo | (((u16) hi) << 8); 189 return lo | (((u16) hi) << 8);
191} 190}
192 191
193static inline u16 Mk16_le(u16 * v) 192static inline u16 Mk16_le(__le16 * v)
194{ 193{
195 return le16_to_cpu(*v); 194 return le16_to_cpu(*v);
196} 195}
@@ -276,15 +275,15 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
276 PPK[5] = TTAK[4] + IV16; 275 PPK[5] = TTAK[4] + IV16;
277 276
278 /* Step 2 - 96-bit bijective mixing using S-box */ 277 /* Step 2 - 96-bit bijective mixing using S-box */
279 PPK[0] += _S_(PPK[5] ^ Mk16_le((u16 *) & TK[0])); 278 PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0]));
280 PPK[1] += _S_(PPK[0] ^ Mk16_le((u16 *) & TK[2])); 279 PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2]));
281 PPK[2] += _S_(PPK[1] ^ Mk16_le((u16 *) & TK[4])); 280 PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4]));
282 PPK[3] += _S_(PPK[2] ^ Mk16_le((u16 *) & TK[6])); 281 PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6]));
283 PPK[4] += _S_(PPK[3] ^ Mk16_le((u16 *) & TK[8])); 282 PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8]));
284 PPK[5] += _S_(PPK[4] ^ Mk16_le((u16 *) & TK[10])); 283 PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10]));
285 284
286 PPK[0] += RotR1(PPK[5] ^ Mk16_le((u16 *) & TK[12])); 285 PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12]));
287 PPK[1] += RotR1(PPK[0] ^ Mk16_le((u16 *) & TK[14])); 286 PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14]));
288 PPK[2] += RotR1(PPK[1]); 287 PPK[2] += RotR1(PPK[1]);
289 PPK[3] += RotR1(PPK[2]); 288 PPK[3] += RotR1(PPK[2]);
290 PPK[4] += RotR1(PPK[3]); 289 PPK[4] += RotR1(PPK[3]);
@@ -295,7 +294,7 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
295 WEPSeed[0] = Hi8(IV16); 294 WEPSeed[0] = Hi8(IV16);
296 WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; 295 WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
297 WEPSeed[2] = Lo8(IV16); 296 WEPSeed[2] = Lo8(IV16);
298 WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((u16 *) & TK[0])) >> 1); 297 WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1);
299 298
300#ifdef __BIG_ENDIAN 299#ifdef __BIG_ENDIAN
301 { 300 {
@@ -465,7 +464,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
465 pos += 8; 464 pos += 8;
466 465
467 if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { 466 if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
468 if (net_ratelimit()) { 467 if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) {
469 IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%s" 468 IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%s"
470 " previous TSC %08x%04x received TSC " 469 " previous TSC %08x%04x received TSC "
471 "%08x%04x\n", print_mac(mac, hdr->addr2), 470 "%08x%04x\n", print_mac(mac, hdr->addr2),
@@ -505,7 +504,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
505 * it needs to be recalculated for the next packet. */ 504 * it needs to be recalculated for the next packet. */
506 tkey->rx_phase1_done = 0; 505 tkey->rx_phase1_done = 0;
507 } 506 }
508 if (net_ratelimit()) { 507 if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) {
509 IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA=" 508 IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA="
510 "%s\n", print_mac(mac, hdr->addr2)); 509 "%s\n", print_mac(mac, hdr->addr2));
511 } 510 }
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 866fc04c44f9..3fa30c40779f 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -22,7 +22,6 @@
22#include <net/ieee80211.h> 22#include <net/ieee80211.h>
23 23
24#include <linux/crypto.h> 24#include <linux/crypto.h>
25#include <linux/scatterlist.h>
26#include <linux/crc32.h> 25#include <linux/crc32.h>
27 26
28MODULE_AUTHOR("Jouni Malinen"); 27MODULE_AUTHOR("Jouni Malinen");
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 69cb6aad25be..3bca97f55d47 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -181,9 +181,8 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
181 ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ 181 ieee->ieee802_1x = 1; /* Default to supporting 802.1x */
182 182
183 INIT_LIST_HEAD(&ieee->crypt_deinit_list); 183 INIT_LIST_HEAD(&ieee->crypt_deinit_list);
184 init_timer(&ieee->crypt_deinit_timer); 184 setup_timer(&ieee->crypt_deinit_timer, ieee80211_crypt_deinit_handler,
185 ieee->crypt_deinit_timer.data = (unsigned long)ieee; 185 (unsigned long)ieee);
186 ieee->crypt_deinit_timer.function = ieee80211_crypt_deinit_handler;
187 ieee->crypt_quiesced = 0; 186 ieee->crypt_quiesced = 0;
188 187
189 spin_lock_init(&ieee->lock); 188 spin_lock_init(&ieee->lock);
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 21c0fadde03b..1e3f87c8c012 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -45,7 +45,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
45 skb_reset_mac_header(skb); 45 skb_reset_mac_header(skb);
46 skb_pull(skb, ieee80211_get_hdrlen(fc)); 46 skb_pull(skb, ieee80211_get_hdrlen(fc));
47 skb->pkt_type = PACKET_OTHERHOST; 47 skb->pkt_type = PACKET_OTHERHOST;
48 skb->protocol = __constant_htons(ETH_P_80211_RAW); 48 skb->protocol = htons(ETH_P_80211_RAW);
49 memset(skb->cb, 0, sizeof(skb->cb)); 49 memset(skb->cb, 0, sizeof(skb->cb));
50 netif_rx(skb); 50 netif_rx(skb);
51} 51}
@@ -754,7 +754,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
754 memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); 754 memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
755 memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); 755 memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
756 } else { 756 } else {
757 u16 len; 757 __be16 len;
758 /* Leave Ethernet header part of hdr and full payload */ 758 /* Leave Ethernet header part of hdr and full payload */
759 skb_pull(skb, hdrlen); 759 skb_pull(skb, hdrlen);
760 len = htons(skb->len); 760 len = htons(skb->len);
@@ -800,7 +800,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
800 if (skb2 != NULL) { 800 if (skb2 != NULL) {
801 /* send to wireless media */ 801 /* send to wireless media */
802 skb2->dev = dev; 802 skb2->dev = dev;
803 skb2->protocol = __constant_htons(ETH_P_802_3); 803 skb2->protocol = htons(ETH_P_802_3);
804 skb_reset_mac_header(skb2); 804 skb_reset_mac_header(skb2);
805 skb_reset_network_header(skb2); 805 skb_reset_network_header(skb2);
806 /* skb2->network_header += ETH_HLEN; */ 806 /* skb2->network_header += ETH_HLEN; */
@@ -1032,16 +1032,16 @@ static int ieee80211_qos_convert_ac_to_parameters(struct
1032 qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2; 1032 qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2;
1033 1033
1034 cw_min = ac_params->ecw_min_max & 0x0F; 1034 cw_min = ac_params->ecw_min_max & 0x0F;
1035 qos_param->cw_min[i] = (u16) ((1 << cw_min) - 1); 1035 qos_param->cw_min[i] = cpu_to_le16((1 << cw_min) - 1);
1036 1036
1037 cw_max = (ac_params->ecw_min_max & 0xF0) >> 4; 1037 cw_max = (ac_params->ecw_min_max & 0xF0) >> 4;
1038 qos_param->cw_max[i] = (u16) ((1 << cw_max) - 1); 1038 qos_param->cw_max[i] = cpu_to_le16((1 << cw_max) - 1);
1039 1039
1040 qos_param->flag[i] = 1040 qos_param->flag[i] =
1041 (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00; 1041 (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00;
1042 1042
1043 txop = le16_to_cpu(ac_params->tx_op_limit) * 32; 1043 txop = le16_to_cpu(ac_params->tx_op_limit) * 32;
1044 qos_param->tx_op_limit[i] = (u16) txop; 1044 qos_param->tx_op_limit[i] = cpu_to_le16(txop);
1045 } 1045 }
1046 return rc; 1046 return rc;
1047} 1047}
@@ -1585,26 +1585,25 @@ static void ieee80211_process_probe_response(struct ieee80211_device
1585 DECLARE_MAC_BUF(mac); 1585 DECLARE_MAC_BUF(mac);
1586 1586
1587 IEEE80211_DEBUG_SCAN("'%s' (%s" 1587 IEEE80211_DEBUG_SCAN("'%s' (%s"
1588 "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", 1588 "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
1589 escape_essid(info_element->data, 1589 escape_essid(info_element->data, info_element->len),
1590 info_element->len), 1590 print_mac(mac, beacon->header.addr3),
1591 print_mac(mac, beacon->header.addr3), 1591 (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0',
1592 (beacon->capability & (1 << 0xf)) ? '1' : '0', 1592 (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0',
1593 (beacon->capability & (1 << 0xe)) ? '1' : '0', 1593 (beacon->capability & cpu_to_le16(1 << 0xd)) ? '1' : '0',
1594 (beacon->capability & (1 << 0xd)) ? '1' : '0', 1594 (beacon->capability & cpu_to_le16(1 << 0xc)) ? '1' : '0',
1595 (beacon->capability & (1 << 0xc)) ? '1' : '0', 1595 (beacon->capability & cpu_to_le16(1 << 0xb)) ? '1' : '0',
1596 (beacon->capability & (1 << 0xb)) ? '1' : '0', 1596 (beacon->capability & cpu_to_le16(1 << 0xa)) ? '1' : '0',
1597 (beacon->capability & (1 << 0xa)) ? '1' : '0', 1597 (beacon->capability & cpu_to_le16(1 << 0x9)) ? '1' : '0',
1598 (beacon->capability & (1 << 0x9)) ? '1' : '0', 1598 (beacon->capability & cpu_to_le16(1 << 0x8)) ? '1' : '0',
1599 (beacon->capability & (1 << 0x8)) ? '1' : '0', 1599 (beacon->capability & cpu_to_le16(1 << 0x7)) ? '1' : '0',
1600 (beacon->capability & (1 << 0x7)) ? '1' : '0', 1600 (beacon->capability & cpu_to_le16(1 << 0x6)) ? '1' : '0',
1601 (beacon->capability & (1 << 0x6)) ? '1' : '0', 1601 (beacon->capability & cpu_to_le16(1 << 0x5)) ? '1' : '0',
1602 (beacon->capability & (1 << 0x5)) ? '1' : '0', 1602 (beacon->capability & cpu_to_le16(1 << 0x4)) ? '1' : '0',
1603 (beacon->capability & (1 << 0x4)) ? '1' : '0', 1603 (beacon->capability & cpu_to_le16(1 << 0x3)) ? '1' : '0',
1604 (beacon->capability & (1 << 0x3)) ? '1' : '0', 1604 (beacon->capability & cpu_to_le16(1 << 0x2)) ? '1' : '0',
1605 (beacon->capability & (1 << 0x2)) ? '1' : '0', 1605 (beacon->capability & cpu_to_le16(1 << 0x1)) ? '1' : '0',
1606 (beacon->capability & (1 << 0x1)) ? '1' : '0', 1606 (beacon->capability & cpu_to_le16(1 << 0x0)) ? '1' : '0');
1607 (beacon->capability & (1 << 0x0)) ? '1' : '0');
1608 1607
1609 if (ieee80211_network_init(ieee, beacon, &network, stats)) { 1608 if (ieee80211_network_init(ieee, beacon, &network, stats)) {
1610 IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n", 1609 IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n",
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index a4c3c51140a3..d8b02603cbe5 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -126,7 +126,7 @@ payload of each frame is reduced to 492 bytes.
126static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 }; 126static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 };
127static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 }; 127static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 };
128 128
129static int ieee80211_copy_snap(u8 * data, u16 h_proto) 129static int ieee80211_copy_snap(u8 * data, __be16 h_proto)
130{ 130{
131 struct ieee80211_snap_hdr *snap; 131 struct ieee80211_snap_hdr *snap;
132 u8 *oui; 132 u8 *oui;
@@ -136,7 +136,7 @@ static int ieee80211_copy_snap(u8 * data, u16 h_proto)
136 snap->ssap = 0xaa; 136 snap->ssap = 0xaa;
137 snap->ctrl = 0x03; 137 snap->ctrl = 0x03;
138 138
139 if (h_proto == 0x8137 || h_proto == 0x80f3) 139 if (h_proto == htons(ETH_P_AARP) || h_proto == htons(ETH_P_IPX))
140 oui = P802_1H_OUI; 140 oui = P802_1H_OUI;
141 else 141 else
142 oui = RFC1042_OUI; 142 oui = RFC1042_OUI;
@@ -144,7 +144,7 @@ static int ieee80211_copy_snap(u8 * data, u16 h_proto)
144 snap->oui[1] = oui[1]; 144 snap->oui[1] = oui[1];
145 snap->oui[2] = oui[2]; 145 snap->oui[2] = oui[2];
146 146
147 *(u16 *) (data + SNAP_SIZE) = htons(h_proto); 147 memcpy(data + SNAP_SIZE, &h_proto, sizeof(u16));
148 148
149 return SNAP_SIZE + sizeof(u16); 149 return SNAP_SIZE + sizeof(u16);
150} 150}
@@ -260,7 +260,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
260 rts_required; 260 rts_required;
261 unsigned long flags; 261 unsigned long flags;
262 struct net_device_stats *stats = &ieee->stats; 262 struct net_device_stats *stats = &ieee->stats;
263 int ether_type, encrypt, host_encrypt, host_encrypt_msdu, host_build_iv; 263 int encrypt, host_encrypt, host_encrypt_msdu, host_build_iv;
264 __be16 ether_type;
264 int bytes, fc, hdr_len; 265 int bytes, fc, hdr_len;
265 struct sk_buff *skb_frag; 266 struct sk_buff *skb_frag;
266 struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */ 267 struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */
@@ -291,11 +292,11 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
291 goto success; 292 goto success;
292 } 293 }
293 294
294 ether_type = ntohs(((struct ethhdr *)skb->data)->h_proto); 295 ether_type = ((struct ethhdr *)skb->data)->h_proto;
295 296
296 crypt = ieee->crypt[ieee->tx_keyidx]; 297 crypt = ieee->crypt[ieee->tx_keyidx];
297 298
298 encrypt = !(ether_type == ETH_P_PAE && ieee->ieee802_1x) && 299 encrypt = !(ether_type == htons(ETH_P_PAE) && ieee->ieee802_1x) &&
299 ieee->sec.encrypt; 300 ieee->sec.encrypt;
300 301
301 host_encrypt = ieee->host_encrypt && encrypt && crypt; 302 host_encrypt = ieee->host_encrypt && encrypt && crypt;
@@ -303,7 +304,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
303 host_build_iv = ieee->host_build_iv && encrypt && crypt; 304 host_build_iv = ieee->host_build_iv && encrypt && crypt;
304 305
305 if (!encrypt && ieee->ieee802_1x && 306 if (!encrypt && ieee->ieee802_1x &&
306 ieee->drop_unencrypted && ether_type != ETH_P_PAE) { 307 ieee->drop_unencrypted && ether_type != htons(ETH_P_PAE)) {
307 stats->tx_dropped++; 308 stats->tx_dropped++;
308 goto success; 309 goto success;
309 } 310 }
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index d309e8f19992..623489afa62c 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -709,7 +709,7 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
709 } else 709 } else
710 idx = ieee->tx_keyidx; 710 idx = ieee->tx_keyidx;
711 711
712 if (!ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY && 712 if (!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) &&
713 ext->alg != IW_ENCODE_ALG_WEP) 713 ext->alg != IW_ENCODE_ALG_WEP)
714 if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA) 714 if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA)
715 return -EINVAL; 715 return -EINVAL;
diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c
index a53a751d0702..1a96c2572578 100644
--- a/net/ieee80211/softmac/ieee80211softmac_auth.c
+++ b/net/ieee80211/softmac/ieee80211softmac_auth.c
@@ -178,11 +178,11 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
178 } 178 }
179 179
180 /* Parse the auth packet */ 180 /* Parse the auth packet */
181 switch(auth->algorithm) { 181 switch(le16_to_cpu(auth->algorithm)) {
182 case WLAN_AUTH_OPEN: 182 case WLAN_AUTH_OPEN:
183 /* Check the status code of the response */ 183 /* Check the status code of the response */
184 184
185 switch(auth->status) { 185 switch(le16_to_cpu(auth->status)) {
186 case WLAN_STATUS_SUCCESS: 186 case WLAN_STATUS_SUCCESS:
187 /* Update the status to Authenticated */ 187 /* Update the status to Authenticated */
188 spin_lock_irqsave(&mac->lock, flags); 188 spin_lock_irqsave(&mac->lock, flags);
@@ -210,7 +210,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
210 break; 210 break;
211 case WLAN_AUTH_SHARED_KEY: 211 case WLAN_AUTH_SHARED_KEY:
212 /* Figure out where we are in the process */ 212 /* Figure out where we are in the process */
213 switch(auth->transaction) { 213 switch(le16_to_cpu(auth->transaction)) {
214 case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE: 214 case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE:
215 /* Check to make sure we have a challenge IE */ 215 /* Check to make sure we have a challenge IE */
216 data = (u8 *)auth->info_element; 216 data = (u8 *)auth->info_element;
diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c
index 26c35253be33..73b4b13fbd8f 100644
--- a/net/ieee80211/softmac/ieee80211softmac_io.c
+++ b/net/ieee80211/softmac/ieee80211softmac_io.c
@@ -148,11 +148,11 @@ ieee80211softmac_hdr_3addr(struct ieee80211softmac_device *mac,
148 * shouldn't the sequence number be in ieee80211? */ 148 * shouldn't the sequence number be in ieee80211? */
149} 149}
150 150
151static u16 151static __le16
152ieee80211softmac_capabilities(struct ieee80211softmac_device *mac, 152ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
153 struct ieee80211softmac_network *net) 153 struct ieee80211softmac_network *net)
154{ 154{
155 u16 capability = 0; 155 __le16 capability = 0;
156 156
157 /* ESS and IBSS bits are set according to the current mode */ 157 /* ESS and IBSS bits are set according to the current mode */
158 switch (mac->ieee->iw_mode) { 158 switch (mac->ieee->iw_mode) {
@@ -163,8 +163,8 @@ ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
163 capability = cpu_to_le16(WLAN_CAPABILITY_IBSS); 163 capability = cpu_to_le16(WLAN_CAPABILITY_IBSS);
164 break; 164 break;
165 case IW_MODE_AUTO: 165 case IW_MODE_AUTO:
166 capability = net->capabilities & 166 capability = cpu_to_le16(net->capabilities &
167 (WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS); 167 (WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS));
168 break; 168 break;
169 default: 169 default:
170 /* bleh. we don't ever go to these modes */ 170 /* bleh. we don't ever go to these modes */
@@ -182,7 +182,7 @@ ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
182 /* Short Preamble */ 182 /* Short Preamble */
183 /* Always supported: we probably won't ever be powering devices which 183 /* Always supported: we probably won't ever be powering devices which
184 * dont support this... */ 184 * dont support this... */
185 capability |= WLAN_CAPABILITY_SHORT_PREAMBLE; 185 capability |= cpu_to_le16(WLAN_CAPABILITY_SHORT_PREAMBLE);
186 186
187 /* PBCC */ 187 /* PBCC */
188 /* Not widely used */ 188 /* Not widely used */
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index ac36767b56e8..e01b59aedc54 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -470,7 +470,7 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev,
470{ 470{
471 struct ieee80211softmac_device *mac = ieee80211_priv(dev); 471 struct ieee80211softmac_device *mac = ieee80211_priv(dev);
472 struct iw_mlme *mlme = (struct iw_mlme *)extra; 472 struct iw_mlme *mlme = (struct iw_mlme *)extra;
473 u16 reason = cpu_to_le16(mlme->reason_code); 473 u16 reason = mlme->reason_code;
474 struct ieee80211softmac_network *net; 474 struct ieee80211softmac_network *net;
475 int err = -EINVAL; 475 int err = -EINVAL;
476 476
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9f9fd2c6f6e2..19880b086e71 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -85,6 +85,13 @@ endchoice
85config IP_FIB_HASH 85config IP_FIB_HASH
86 def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER 86 def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
87 87
88config IP_FIB_TRIE_STATS
89 bool "FIB TRIE statistics"
90 depends on IP_FIB_TRIE
91 ---help---
92 Keep track of statistics on structure of FIB TRIE table.
93 Useful for testing and measuring TRIE performance.
94
88config IP_MULTIPLE_TABLES 95config IP_MULTIPLE_TABLES
89 bool "IP: policy routing" 96 bool "IP: policy routing"
90 depends on IP_ADVANCED_ROUTER 97 depends on IP_ADVANCED_ROUTER
@@ -336,6 +343,7 @@ config INET_ESP
336 tristate "IP: ESP transformation" 343 tristate "IP: ESP transformation"
337 select XFRM 344 select XFRM
338 select CRYPTO 345 select CRYPTO
346 select CRYPTO_AEAD
339 select CRYPTO_HMAC 347 select CRYPTO_HMAC
340 select CRYPTO_MD5 348 select CRYPTO_MD5
341 select CRYPTO_CBC 349 select CRYPTO_CBC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 93fe3966805d..ad40ef3f9ebc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,9 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \ 13 fib_frontend.o fib_semantics.o \
14 inet_fragment.o 14 inet_fragment.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
16obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o 17obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
17obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o 18obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
18obj-$(CONFIG_PROC_FS) += proc.o 19obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 621b128897d7..09ca5293d08f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -126,6 +126,10 @@ extern void ip_mc_drop_socket(struct sock *sk);
126static struct list_head inetsw[SOCK_MAX]; 126static struct list_head inetsw[SOCK_MAX];
127static DEFINE_SPINLOCK(inetsw_lock); 127static DEFINE_SPINLOCK(inetsw_lock);
128 128
129struct ipv4_config ipv4_config;
130
131EXPORT_SYMBOL(ipv4_config);
132
129/* New destruction routine */ 133/* New destruction routine */
130 134
131void inet_sock_destruct(struct sock *sk) 135void inet_sock_destruct(struct sock *sk)
@@ -135,6 +139,8 @@ void inet_sock_destruct(struct sock *sk)
135 __skb_queue_purge(&sk->sk_receive_queue); 139 __skb_queue_purge(&sk->sk_receive_queue);
136 __skb_queue_purge(&sk->sk_error_queue); 140 __skb_queue_purge(&sk->sk_error_queue);
137 141
142 sk_mem_reclaim(sk);
143
138 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { 144 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
139 printk("Attempt to release TCP socket in state %d %p\n", 145 printk("Attempt to release TCP socket in state %d %p\n",
140 sk->sk_state, sk); 146 sk->sk_state, sk);
@@ -323,7 +329,7 @@ lookup_protocol:
323 BUG_TRAP(answer_prot->slab != NULL); 329 BUG_TRAP(answer_prot->slab != NULL);
324 330
325 err = -ENOBUFS; 331 err = -ENOBUFS;
326 sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); 332 sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
327 if (sk == NULL) 333 if (sk == NULL)
328 goto out; 334 goto out;
329 335
@@ -440,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
440 if (addr_len < sizeof(struct sockaddr_in)) 446 if (addr_len < sizeof(struct sockaddr_in))
441 goto out; 447 goto out;
442 448
443 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); 449 chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr);
444 450
445 /* Not specified by any standard per-se, however it breaks too 451 /* Not specified by any standard per-se, however it breaks too
446 * many applications when removed. It is unfortunate since 452 * many applications when removed. It is unfortunate since
@@ -789,12 +795,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
789 case SIOCADDRT: 795 case SIOCADDRT:
790 case SIOCDELRT: 796 case SIOCDELRT:
791 case SIOCRTMSG: 797 case SIOCRTMSG:
792 err = ip_rt_ioctl(cmd, (void __user *)arg); 798 err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg);
793 break; 799 break;
794 case SIOCDARP: 800 case SIOCDARP:
795 case SIOCGARP: 801 case SIOCGARP:
796 case SIOCSARP: 802 case SIOCSARP:
797 err = arp_ioctl(cmd, (void __user *)arg); 803 err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg);
798 break; 804 break;
799 case SIOCGIFADDR: 805 case SIOCGIFADDR:
800 case SIOCSIFADDR: 806 case SIOCSIFADDR:
@@ -838,6 +844,7 @@ const struct proto_ops inet_stream_ops = {
838 .recvmsg = sock_common_recvmsg, 844 .recvmsg = sock_common_recvmsg,
839 .mmap = sock_no_mmap, 845 .mmap = sock_no_mmap,
840 .sendpage = tcp_sendpage, 846 .sendpage = tcp_sendpage,
847 .splice_read = tcp_splice_read,
841#ifdef CONFIG_COMPAT 848#ifdef CONFIG_COMPAT
842 .compat_setsockopt = compat_sock_common_setsockopt, 849 .compat_setsockopt = compat_sock_common_setsockopt,
843 .compat_getsockopt = compat_sock_common_getsockopt, 850 .compat_getsockopt = compat_sock_common_getsockopt,
@@ -1106,7 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1106 }; 1113 };
1107 1114
1108 security_sk_classify_flow(sk, &fl); 1115 security_sk_classify_flow(sk, &fl);
1109 err = ip_route_output_flow(&rt, &fl, sk, 0); 1116 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0);
1110} 1117}
1111 if (!err) 1118 if (!err)
1112 sk_setup_caps(sk, &rt->u.dst); 1119 sk_setup_caps(sk, &rt->u.dst);
@@ -1237,7 +1244,7 @@ unsigned long snmp_fold_field(void *mib[], int offt)
1237} 1244}
1238EXPORT_SYMBOL_GPL(snmp_fold_field); 1245EXPORT_SYMBOL_GPL(snmp_fold_field);
1239 1246
1240int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) 1247int snmp_mib_init(void *ptr[2], size_t mibsize)
1241{ 1248{
1242 BUG_ON(ptr == NULL); 1249 BUG_ON(ptr == NULL);
1243 ptr[0] = __alloc_percpu(mibsize); 1250 ptr[0] = __alloc_percpu(mibsize);
@@ -1286,37 +1293,31 @@ static struct net_protocol udp_protocol = {
1286 1293
1287static struct net_protocol icmp_protocol = { 1294static struct net_protocol icmp_protocol = {
1288 .handler = icmp_rcv, 1295 .handler = icmp_rcv,
1296 .no_policy = 1,
1289}; 1297};
1290 1298
1291static int __init init_ipv4_mibs(void) 1299static int __init init_ipv4_mibs(void)
1292{ 1300{
1293 if (snmp_mib_init((void **)net_statistics, 1301 if (snmp_mib_init((void **)net_statistics,
1294 sizeof(struct linux_mib), 1302 sizeof(struct linux_mib)) < 0)
1295 __alignof__(struct linux_mib)) < 0)
1296 goto err_net_mib; 1303 goto err_net_mib;
1297 if (snmp_mib_init((void **)ip_statistics, 1304 if (snmp_mib_init((void **)ip_statistics,
1298 sizeof(struct ipstats_mib), 1305 sizeof(struct ipstats_mib)) < 0)
1299 __alignof__(struct ipstats_mib)) < 0)
1300 goto err_ip_mib; 1306 goto err_ip_mib;
1301 if (snmp_mib_init((void **)icmp_statistics, 1307 if (snmp_mib_init((void **)icmp_statistics,
1302 sizeof(struct icmp_mib), 1308 sizeof(struct icmp_mib)) < 0)
1303 __alignof__(struct icmp_mib)) < 0)
1304 goto err_icmp_mib; 1309 goto err_icmp_mib;
1305 if (snmp_mib_init((void **)icmpmsg_statistics, 1310 if (snmp_mib_init((void **)icmpmsg_statistics,
1306 sizeof(struct icmpmsg_mib), 1311 sizeof(struct icmpmsg_mib)) < 0)
1307 __alignof__(struct icmpmsg_mib)) < 0)
1308 goto err_icmpmsg_mib; 1312 goto err_icmpmsg_mib;
1309 if (snmp_mib_init((void **)tcp_statistics, 1313 if (snmp_mib_init((void **)tcp_statistics,
1310 sizeof(struct tcp_mib), 1314 sizeof(struct tcp_mib)) < 0)
1311 __alignof__(struct tcp_mib)) < 0)
1312 goto err_tcp_mib; 1315 goto err_tcp_mib;
1313 if (snmp_mib_init((void **)udp_statistics, 1316 if (snmp_mib_init((void **)udp_statistics,
1314 sizeof(struct udp_mib), 1317 sizeof(struct udp_mib)) < 0)
1315 __alignof__(struct udp_mib)) < 0)
1316 goto err_udp_mib; 1318 goto err_udp_mib;
1317 if (snmp_mib_init((void **)udplite_statistics, 1319 if (snmp_mib_init((void **)udplite_statistics,
1318 sizeof(struct udp_mib), 1320 sizeof(struct udp_mib)) < 0)
1319 __alignof__(struct udp_mib)) < 0)
1320 goto err_udplite_mib; 1321 goto err_udplite_mib;
1321 1322
1322 tcp_mib_init(); 1323 tcp_mib_init();
@@ -1418,6 +1419,9 @@ static int __init inet_init(void)
1418 /* Setup TCP slab cache for open requests. */ 1419 /* Setup TCP slab cache for open requests. */
1419 tcp_init(); 1420 tcp_init();
1420 1421
1422 /* Setup UDP memory threshold */
1423 udp_init();
1424
1421 /* Add UDP-Lite (RFC 3828) */ 1425 /* Add UDP-Lite (RFC 3828) */
1422 udplite4_register(); 1426 udplite4_register();
1423 1427
@@ -1471,15 +1475,11 @@ static int __init ipv4_proc_init(void)
1471 goto out_tcp; 1475 goto out_tcp;
1472 if (udp4_proc_init()) 1476 if (udp4_proc_init())
1473 goto out_udp; 1477 goto out_udp;
1474 if (fib_proc_init())
1475 goto out_fib;
1476 if (ip_misc_proc_init()) 1478 if (ip_misc_proc_init())
1477 goto out_misc; 1479 goto out_misc;
1478out: 1480out:
1479 return rc; 1481 return rc;
1480out_misc: 1482out_misc:
1481 fib_proc_exit();
1482out_fib:
1483 udp4_proc_exit(); 1483 udp4_proc_exit();
1484out_udp: 1484out_udp:
1485 tcp4_proc_exit(); 1485 tcp4_proc_exit();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4e8e3b079f5b..9d4555ec0b59 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -8,7 +8,6 @@
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <net/icmp.h> 9#include <net/icmp.h>
10#include <net/protocol.h> 10#include <net/protocol.h>
11#include <asm/scatterlist.h>
12 11
13 12
14/* Clear mutable options and find final destination to substitute 13/* Clear mutable options and find final destination to substitute
@@ -170,6 +169,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
170 if (ip_clear_mutable_options(iph, &dummy)) 169 if (ip_clear_mutable_options(iph, &dummy))
171 goto out; 170 goto out;
172 } 171 }
172
173 spin_lock(&x->lock);
173 { 174 {
174 u8 auth_data[MAX_AH_AUTH_LEN]; 175 u8 auth_data[MAX_AH_AUTH_LEN];
175 176
@@ -177,13 +178,16 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
177 skb_push(skb, ihl); 178 skb_push(skb, ihl);
178 err = ah_mac_digest(ahp, skb, ah->auth_data); 179 err = ah_mac_digest(ahp, skb, ah->auth_data);
179 if (err) 180 if (err)
180 goto out; 181 goto unlock;
181 err = -EINVAL; 182 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
182 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { 183 err = -EBADMSG;
183 x->stats.integrity_failed++;
184 goto out;
185 }
186 } 184 }
185unlock:
186 spin_unlock(&x->lock);
187
188 if (err)
189 goto out;
190
187 skb->network_header += ah_hlen; 191 skb->network_header += ah_hlen;
188 memcpy(skb_network_header(skb), work_buf, ihl); 192 memcpy(skb_network_header(skb), work_buf, ihl);
189 skb->transport_header = skb->network_header; 193 skb->transport_header = skb->network_header;
@@ -296,7 +300,7 @@ static void ah_destroy(struct xfrm_state *x)
296} 300}
297 301
298 302
299static struct xfrm_type ah_type = 303static const struct xfrm_type ah_type =
300{ 304{
301 .description = "AH4", 305 .description = "AH4",
302 .owner = THIS_MODULE, 306 .owner = THIS_MODULE,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 36d6798947b5..8e17f65f4002 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -111,12 +111,8 @@
111#include <net/tcp.h> 111#include <net/tcp.h>
112#include <net/sock.h> 112#include <net/sock.h>
113#include <net/arp.h> 113#include <net/arp.h>
114#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
115#include <net/ax25.h> 114#include <net/ax25.h>
116#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
117#include <net/netrom.h> 115#include <net/netrom.h>
118#endif
119#endif
120#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 116#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
121#include <net/atmclip.h> 117#include <net/atmclip.h>
122struct neigh_table *clip_tbl_hook; 118struct neigh_table *clip_tbl_hook;
@@ -215,7 +211,7 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
215 ip_tr_mc_map(addr, haddr); 211 ip_tr_mc_map(addr, haddr);
216 return 0; 212 return 0;
217 case ARPHRD_INFINIBAND: 213 case ARPHRD_INFINIBAND:
218 ip_ib_mc_map(addr, haddr); 214 ip_ib_mc_map(addr, dev->broadcast, haddr);
219 return 0; 215 return 0;
220 default: 216 default:
221 if (dir) { 217 if (dir) {
@@ -239,8 +235,6 @@ static int arp_constructor(struct neighbour *neigh)
239 struct in_device *in_dev; 235 struct in_device *in_dev;
240 struct neigh_parms *parms; 236 struct neigh_parms *parms;
241 237
242 neigh->type = inet_addr_type(addr);
243
244 rcu_read_lock(); 238 rcu_read_lock();
245 in_dev = __in_dev_get_rcu(dev); 239 in_dev = __in_dev_get_rcu(dev);
246 if (in_dev == NULL) { 240 if (in_dev == NULL) {
@@ -248,6 +242,8 @@ static int arp_constructor(struct neighbour *neigh)
248 return -EINVAL; 242 return -EINVAL;
249 } 243 }
250 244
245 neigh->type = inet_addr_type(&init_net, addr);
246
251 parms = in_dev->arp_parms; 247 parms = in_dev->arp_parms;
252 __neigh_parms_put(neigh->parms); 248 __neigh_parms_put(neigh->parms);
253 neigh->parms = neigh_parms_clone(parms); 249 neigh->parms = neigh_parms_clone(parms);
@@ -345,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
345 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
346 default: 342 default:
347 case 0: /* By default announce any local IP */ 343 case 0: /* By default announce any local IP */
348 if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL) 344 if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL)
349 saddr = ip_hdr(skb)->saddr; 345 saddr = ip_hdr(skb)->saddr;
350 break; 346 break;
351 case 1: /* Restrict announcements of saddr in same subnet */ 347 case 1: /* Restrict announcements of saddr in same subnet */
352 if (!skb) 348 if (!skb)
353 break; 349 break;
354 saddr = ip_hdr(skb)->saddr; 350 saddr = ip_hdr(skb)->saddr;
355 if (inet_addr_type(saddr) == RTN_LOCAL) { 351 if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) {
356 /* saddr should be known to target */ 352 /* saddr should be known to target */
357 if (inet_addr_onlink(in_dev, target, saddr)) 353 if (inet_addr_onlink(in_dev, target, saddr))
358 break; 354 break;
@@ -386,8 +382,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
386 read_unlock_bh(&neigh->lock); 382 read_unlock_bh(&neigh->lock);
387} 383}
388 384
389static int arp_ignore(struct in_device *in_dev, struct net_device *dev, 385static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
390 __be32 sip, __be32 tip)
391{ 386{
392 int scope; 387 int scope;
393 388
@@ -407,7 +402,6 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
407 case 3: /* Do not reply for scope host addresses */ 402 case 3: /* Do not reply for scope host addresses */
408 sip = 0; 403 sip = 0;
409 scope = RT_SCOPE_LINK; 404 scope = RT_SCOPE_LINK;
410 dev = NULL;
411 break; 405 break;
412 case 4: /* Reserved */ 406 case 4: /* Reserved */
413 case 5: 407 case 5:
@@ -419,7 +413,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
419 default: 413 default:
420 return 0; 414 return 0;
421 } 415 }
422 return !inet_confirm_addr(dev, sip, tip, scope); 416 return !inet_confirm_addr(in_dev, sip, tip, scope);
423} 417}
424 418
425static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) 419static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
@@ -430,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
430 int flag = 0; 424 int flag = 0;
431 /*unsigned long now; */ 425 /*unsigned long now; */
432 426
433 if (ip_route_output_key(&rt, &fl) < 0) 427 if (ip_route_output_key(&init_net, &rt, &fl) < 0)
434 return 1; 428 return 1;
435 if (rt->u.dst.dev != dev) { 429 if (rt->u.dst.dev != dev) {
436 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); 430 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
@@ -483,7 +477,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
483 477
484 paddr = ((struct rtable*)skb->dst)->rt_gateway; 478 paddr = ((struct rtable*)skb->dst)->rt_gateway;
485 479
486 if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) 480 if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev))
487 return 0; 481 return 0;
488 482
489 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 483 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -564,8 +558,9 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
564 */ 558 */
565struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, 559struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
566 struct net_device *dev, __be32 src_ip, 560 struct net_device *dev, __be32 src_ip,
567 unsigned char *dest_hw, unsigned char *src_hw, 561 const unsigned char *dest_hw,
568 unsigned char *target_hw) 562 const unsigned char *src_hw,
563 const unsigned char *target_hw)
569{ 564{
570 struct sk_buff *skb; 565 struct sk_buff *skb;
571 struct arphdr *arp; 566 struct arphdr *arp;
@@ -678,8 +673,8 @@ void arp_xmit(struct sk_buff *skb)
678 */ 673 */
679void arp_send(int type, int ptype, __be32 dest_ip, 674void arp_send(int type, int ptype, __be32 dest_ip,
680 struct net_device *dev, __be32 src_ip, 675 struct net_device *dev, __be32 src_ip,
681 unsigned char *dest_hw, unsigned char *src_hw, 676 const unsigned char *dest_hw, const unsigned char *src_hw,
682 unsigned char *target_hw) 677 const unsigned char *target_hw)
683{ 678{
684 struct sk_buff *skb; 679 struct sk_buff *skb;
685 680
@@ -710,7 +705,7 @@ static int arp_process(struct sk_buff *skb)
710 struct arphdr *arp; 705 struct arphdr *arp;
711 unsigned char *arp_ptr; 706 unsigned char *arp_ptr;
712 struct rtable *rt; 707 struct rtable *rt;
713 unsigned char *sha, *tha; 708 unsigned char *sha;
714 __be32 sip, tip; 709 __be32 sip, tip;
715 u16 dev_type = dev->type; 710 u16 dev_type = dev->type;
716 int addr_type; 711 int addr_type;
@@ -731,20 +726,10 @@ static int arp_process(struct sk_buff *skb)
731 htons(dev_type) != arp->ar_hrd) 726 htons(dev_type) != arp->ar_hrd)
732 goto out; 727 goto out;
733 break; 728 break;
734#ifdef CONFIG_NET_ETHERNET
735 case ARPHRD_ETHER: 729 case ARPHRD_ETHER:
736#endif
737#ifdef CONFIG_TR
738 case ARPHRD_IEEE802_TR: 730 case ARPHRD_IEEE802_TR:
739#endif
740#ifdef CONFIG_FDDI
741 case ARPHRD_FDDI: 731 case ARPHRD_FDDI:
742#endif
743#ifdef CONFIG_NET_FC
744 case ARPHRD_IEEE802: 732 case ARPHRD_IEEE802:
745#endif
746#if defined(CONFIG_NET_ETHERNET) || defined(CONFIG_TR) || \
747 defined(CONFIG_FDDI) || defined(CONFIG_NET_FC)
748 /* 733 /*
749 * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802 734 * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802
750 * devices, according to RFC 2625) devices will accept ARP 735 * devices, according to RFC 2625) devices will accept ARP
@@ -759,21 +744,16 @@ static int arp_process(struct sk_buff *skb)
759 arp->ar_pro != htons(ETH_P_IP)) 744 arp->ar_pro != htons(ETH_P_IP))
760 goto out; 745 goto out;
761 break; 746 break;
762#endif
763#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
764 case ARPHRD_AX25: 747 case ARPHRD_AX25:
765 if (arp->ar_pro != htons(AX25_P_IP) || 748 if (arp->ar_pro != htons(AX25_P_IP) ||
766 arp->ar_hrd != htons(ARPHRD_AX25)) 749 arp->ar_hrd != htons(ARPHRD_AX25))
767 goto out; 750 goto out;
768 break; 751 break;
769#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
770 case ARPHRD_NETROM: 752 case ARPHRD_NETROM:
771 if (arp->ar_pro != htons(AX25_P_IP) || 753 if (arp->ar_pro != htons(AX25_P_IP) ||
772 arp->ar_hrd != htons(ARPHRD_NETROM)) 754 arp->ar_hrd != htons(ARPHRD_NETROM))
773 goto out; 755 goto out;
774 break; 756 break;
775#endif
776#endif
777 } 757 }
778 758
779 /* Understand only these message types */ 759 /* Understand only these message types */
@@ -790,14 +770,13 @@ static int arp_process(struct sk_buff *skb)
790 arp_ptr += dev->addr_len; 770 arp_ptr += dev->addr_len;
791 memcpy(&sip, arp_ptr, 4); 771 memcpy(&sip, arp_ptr, 4);
792 arp_ptr += 4; 772 arp_ptr += 4;
793 tha = arp_ptr;
794 arp_ptr += dev->addr_len; 773 arp_ptr += dev->addr_len;
795 memcpy(&tip, arp_ptr, 4); 774 memcpy(&tip, arp_ptr, 4);
796/* 775/*
797 * Check for bad requests for 127.x.x.x and requests for multicast 776 * Check for bad requests for 127.x.x.x and requests for multicast
798 * addresses. If this is one such, delete it. 777 * addresses. If this is one such, delete it.
799 */ 778 */
800 if (LOOPBACK(tip) || MULTICAST(tip)) 779 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
801 goto out; 780 goto out;
802 781
803/* 782/*
@@ -826,9 +805,10 @@ static int arp_process(struct sk_buff *skb)
826 /* Special case: IPv4 duplicate address detection packet (RFC2131) */ 805 /* Special case: IPv4 duplicate address detection packet (RFC2131) */
827 if (sip == 0) { 806 if (sip == 0) {
828 if (arp->ar_op == htons(ARPOP_REQUEST) && 807 if (arp->ar_op == htons(ARPOP_REQUEST) &&
829 inet_addr_type(tip) == RTN_LOCAL && 808 inet_addr_type(&init_net, tip) == RTN_LOCAL &&
830 !arp_ignore(in_dev,dev,sip,tip)) 809 !arp_ignore(in_dev, sip, tip))
831 arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr); 810 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
811 dev->dev_addr, sha);
832 goto out; 812 goto out;
833 } 813 }
834 814
@@ -844,7 +824,7 @@ static int arp_process(struct sk_buff *skb)
844 int dont_send = 0; 824 int dont_send = 0;
845 825
846 if (!dont_send) 826 if (!dont_send)
847 dont_send |= arp_ignore(in_dev,dev,sip,tip); 827 dont_send |= arp_ignore(in_dev,sip,tip);
848 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 828 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
849 dont_send |= arp_filter(sip,tip,dev); 829 dont_send |= arp_filter(sip,tip,dev);
850 if (!dont_send) 830 if (!dont_send)
@@ -854,9 +834,8 @@ static int arp_process(struct sk_buff *skb)
854 } 834 }
855 goto out; 835 goto out;
856 } else if (IN_DEV_FORWARD(in_dev)) { 836 } else if (IN_DEV_FORWARD(in_dev)) {
857 if ((rt->rt_flags&RTCF_DNAT) || 837 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
858 (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && 838 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) {
859 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
860 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 839 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
861 if (n) 840 if (n)
862 neigh_release(n); 841 neigh_release(n);
@@ -879,14 +858,14 @@ static int arp_process(struct sk_buff *skb)
879 858
880 n = __neigh_lookup(&arp_tbl, &sip, dev, 0); 859 n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
881 860
882 if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) { 861 if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) {
883 /* Unsolicited ARP is not accepted by default. 862 /* Unsolicited ARP is not accepted by default.
884 It is possible, that this option should be enabled for some 863 It is possible, that this option should be enabled for some
885 devices (strip is candidate) 864 devices (strip is candidate)
886 */ 865 */
887 if (n == NULL && 866 if (n == NULL &&
888 arp->ar_op == htons(ARPOP_REPLY) && 867 arp->ar_op == htons(ARPOP_REPLY) &&
889 inet_addr_type(sip) == RTN_UNICAST) 868 inet_addr_type(&init_net, sip) == RTN_UNICAST)
890 n = __neigh_lookup(&arp_tbl, &sip, dev, 1); 869 n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
891 } 870 }
892 871
@@ -971,44 +950,60 @@ out_of_mem:
971 * Set (create) an ARP cache entry. 950 * Set (create) an ARP cache entry.
972 */ 951 */
973 952
974static int arp_req_set(struct arpreq *r, struct net_device * dev) 953static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
975{ 954{
976 __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; 955 if (dev == NULL) {
956 IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
957 return 0;
958 }
959 if (__in_dev_get_rtnl(dev)) {
960 IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
961 return 0;
962 }
963 return -ENXIO;
964}
965
966static int arp_req_set_public(struct net *net, struct arpreq *r,
967 struct net_device *dev)
968{
969 __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
970 __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
971
972 if (mask && mask != htonl(0xFFFFFFFF))
973 return -EINVAL;
974 if (!dev && (r->arp_flags & ATF_COM)) {
975 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
976 r->arp_ha.sa_data);
977 if (!dev)
978 return -ENODEV;
979 }
980 if (mask) {
981 if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL)
982 return -ENOBUFS;
983 return 0;
984 }
985
986 return arp_req_set_proxy(net, dev, 1);
987}
988
989static int arp_req_set(struct net *net, struct arpreq *r,
990 struct net_device * dev)
991{
992 __be32 ip;
977 struct neighbour *neigh; 993 struct neighbour *neigh;
978 int err; 994 int err;
979 995
980 if (r->arp_flags&ATF_PUBL) { 996 if (r->arp_flags & ATF_PUBL)
981 __be32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; 997 return arp_req_set_public(net, r, dev);
982 if (mask && mask != htonl(0xFFFFFFFF))
983 return -EINVAL;
984 if (!dev && (r->arp_flags & ATF_COM)) {
985 dev = dev_getbyhwaddr(&init_net, r->arp_ha.sa_family, r->arp_ha.sa_data);
986 if (!dev)
987 return -ENODEV;
988 }
989 if (mask) {
990 if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
991 return -ENOBUFS;
992 return 0;
993 }
994 if (dev == NULL) {
995 IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
996 return 0;
997 }
998 if (__in_dev_get_rtnl(dev)) {
999 IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1);
1000 return 0;
1001 }
1002 return -ENXIO;
1003 }
1004 998
999 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1005 if (r->arp_flags & ATF_PERM) 1000 if (r->arp_flags & ATF_PERM)
1006 r->arp_flags |= ATF_COM; 1001 r->arp_flags |= ATF_COM;
1007 if (dev == NULL) { 1002 if (dev == NULL) {
1008 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1003 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
1009 .tos = RTO_ONLINK } } }; 1004 .tos = RTO_ONLINK } } };
1010 struct rtable * rt; 1005 struct rtable * rt;
1011 if ((err = ip_route_output_key(&rt, &fl)) != 0) 1006 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1012 return err; 1007 return err;
1013 dev = rt->u.dst.dev; 1008 dev = rt->u.dst.dev;
1014 ip_rt_put(rt); 1009 ip_rt_put(rt);
@@ -1085,37 +1080,37 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
1085 return err; 1080 return err;
1086} 1081}
1087 1082
1088static int arp_req_delete(struct arpreq *r, struct net_device * dev) 1083static int arp_req_delete_public(struct net *net, struct arpreq *r,
1084 struct net_device *dev)
1085{
1086 __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
1087 __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
1088
1089 if (mask == htonl(0xFFFFFFFF))
1090 return pneigh_delete(&arp_tbl, net, &ip, dev);
1091
1092 if (mask)
1093 return -EINVAL;
1094
1095 return arp_req_set_proxy(net, dev, 0);
1096}
1097
1098static int arp_req_delete(struct net *net, struct arpreq *r,
1099 struct net_device * dev)
1089{ 1100{
1090 int err; 1101 int err;
1091 __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1102 __be32 ip;
1092 struct neighbour *neigh; 1103 struct neighbour *neigh;
1093 1104
1094 if (r->arp_flags & ATF_PUBL) { 1105 if (r->arp_flags & ATF_PUBL)
1095 __be32 mask = 1106 return arp_req_delete_public(net, r, dev);
1096 ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
1097 if (mask == htonl(0xFFFFFFFF))
1098 return pneigh_delete(&arp_tbl, &ip, dev);
1099 if (mask == 0) {
1100 if (dev == NULL) {
1101 IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
1102 return 0;
1103 }
1104 if (__in_dev_get_rtnl(dev)) {
1105 IN_DEV_CONF_SET(__in_dev_get_rtnl(dev),
1106 PROXY_ARP, 0);
1107 return 0;
1108 }
1109 return -ENXIO;
1110 }
1111 return -EINVAL;
1112 }
1113 1107
1108 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1114 if (dev == NULL) { 1109 if (dev == NULL) {
1115 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1110 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
1116 .tos = RTO_ONLINK } } }; 1111 .tos = RTO_ONLINK } } };
1117 struct rtable * rt; 1112 struct rtable * rt;
1118 if ((err = ip_route_output_key(&rt, &fl)) != 0) 1113 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1119 return err; 1114 return err;
1120 dev = rt->u.dst.dev; 1115 dev = rt->u.dst.dev;
1121 ip_rt_put(rt); 1116 ip_rt_put(rt);
@@ -1138,7 +1133,7 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
1138 * Handle an ARP layer I/O control request. 1133 * Handle an ARP layer I/O control request.
1139 */ 1134 */
1140 1135
1141int arp_ioctl(unsigned int cmd, void __user *arg) 1136int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1142{ 1137{
1143 int err; 1138 int err;
1144 struct arpreq r; 1139 struct arpreq r;
@@ -1170,7 +1165,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
1170 rtnl_lock(); 1165 rtnl_lock();
1171 if (r.arp_dev[0]) { 1166 if (r.arp_dev[0]) {
1172 err = -ENODEV; 1167 err = -ENODEV;
1173 if ((dev = __dev_get_by_name(&init_net, r.arp_dev)) == NULL) 1168 if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL)
1174 goto out; 1169 goto out;
1175 1170
1176 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ 1171 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1186,10 +1181,10 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
1186 1181
1187 switch (cmd) { 1182 switch (cmd) {
1188 case SIOCDARP: 1183 case SIOCDARP:
1189 err = arp_req_delete(&r, dev); 1184 err = arp_req_delete(net, &r, dev);
1190 break; 1185 break;
1191 case SIOCSARP: 1186 case SIOCSARP:
1192 err = arp_req_set(&r, dev); 1187 err = arp_req_set(net, &r, dev);
1193 break; 1188 break;
1194 case SIOCGARP: 1189 case SIOCGARP:
1195 err = arp_req_get(&r, dev); 1190 err = arp_req_get(&r, dev);
@@ -1378,8 +1373,8 @@ static const struct seq_operations arp_seq_ops = {
1378 1373
1379static int arp_seq_open(struct inode *inode, struct file *file) 1374static int arp_seq_open(struct inode *inode, struct file *file)
1380{ 1375{
1381 return seq_open_private(file, &arp_seq_ops, 1376 return seq_open_net(inode, file, &arp_seq_ops,
1382 sizeof(struct neigh_seq_state)); 1377 sizeof(struct neigh_seq_state));
1383} 1378}
1384 1379
1385static const struct file_operations arp_seq_fops = { 1380static const struct file_operations arp_seq_fops = {
@@ -1387,7 +1382,7 @@ static const struct file_operations arp_seq_fops = {
1387 .open = arp_seq_open, 1382 .open = arp_seq_open,
1388 .read = seq_read, 1383 .read = seq_read,
1389 .llseek = seq_lseek, 1384 .llseek = seq_lseek,
1390 .release = seq_release_private, 1385 .release = seq_release_net,
1391}; 1386};
1392 1387
1393static int __init arp_proc_init(void) 1388static int __init arp_proc_init(void)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index f18e88bc86ec..a2241060113b 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -63,7 +63,7 @@ struct cipso_v4_domhsh_entry {
63 * probably be turned into a hash table or something similar so we 63 * probably be turned into a hash table or something similar so we
64 * can do quick lookups. */ 64 * can do quick lookups. */
65static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); 65static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
66static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); 66static LIST_HEAD(cipso_v4_doi_list);
67 67
68/* Label mapping cache */ 68/* Label mapping cache */
69int cipso_v4_cache_enabled = 1; 69int cipso_v4_cache_enabled = 1;
@@ -348,6 +348,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
348 atomic_inc(&entry->lsm_data->refcount); 348 atomic_inc(&entry->lsm_data->refcount);
349 secattr->cache = entry->lsm_data; 349 secattr->cache = entry->lsm_data;
350 secattr->flags |= NETLBL_SECATTR_CACHE; 350 secattr->flags |= NETLBL_SECATTR_CACHE;
351 secattr->type = NETLBL_NLTYPE_CIPSOV4;
351 if (prev_entry == NULL) { 352 if (prev_entry == NULL) {
352 spin_unlock_bh(&cipso_v4_cache[bkt].lock); 353 spin_unlock_bh(&cipso_v4_cache[bkt].lock);
353 return 0; 354 return 0;
@@ -865,7 +866,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
865 } 866 }
866 867
867 for (;;) { 868 for (;;) {
868 host_spot = netlbl_secattr_catmap_walk(secattr->mls_cat, 869 host_spot = netlbl_secattr_catmap_walk(secattr->attr.mls.cat,
869 host_spot + 1); 870 host_spot + 1);
870 if (host_spot < 0) 871 if (host_spot < 0)
871 break; 872 break;
@@ -948,7 +949,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
948 return -EPERM; 949 return -EPERM;
949 break; 950 break;
950 } 951 }
951 ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat, 952 ret_val = netlbl_secattr_catmap_setbit(secattr->attr.mls.cat,
952 host_spot, 953 host_spot,
953 GFP_ATOMIC); 954 GFP_ATOMIC);
954 if (ret_val != 0) 955 if (ret_val != 0)
@@ -1014,7 +1015,8 @@ static int cipso_v4_map_cat_enum_hton(const struct cipso_v4_doi *doi_def,
1014 u32 cat_iter = 0; 1015 u32 cat_iter = 0;
1015 1016
1016 for (;;) { 1017 for (;;) {
1017 cat = netlbl_secattr_catmap_walk(secattr->mls_cat, cat + 1); 1018 cat = netlbl_secattr_catmap_walk(secattr->attr.mls.cat,
1019 cat + 1);
1018 if (cat < 0) 1020 if (cat < 0)
1019 break; 1021 break;
1020 if ((cat_iter + 2) > net_cat_len) 1022 if ((cat_iter + 2) > net_cat_len)
@@ -1049,7 +1051,7 @@ static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def,
1049 u32 iter; 1051 u32 iter;
1050 1052
1051 for (iter = 0; iter < net_cat_len; iter += 2) { 1053 for (iter = 0; iter < net_cat_len; iter += 2) {
1052 ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat, 1054 ret_val = netlbl_secattr_catmap_setbit(secattr->attr.mls.cat,
1053 ntohs(get_unaligned((__be16 *)&net_cat[iter])), 1055 ntohs(get_unaligned((__be16 *)&net_cat[iter])),
1054 GFP_ATOMIC); 1056 GFP_ATOMIC);
1055 if (ret_val != 0) 1057 if (ret_val != 0)
@@ -1130,7 +1132,8 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def,
1130 return -ENOSPC; 1132 return -ENOSPC;
1131 1133
1132 for (;;) { 1134 for (;;) {
1133 iter = netlbl_secattr_catmap_walk(secattr->mls_cat, iter + 1); 1135 iter = netlbl_secattr_catmap_walk(secattr->attr.mls.cat,
1136 iter + 1);
1134 if (iter < 0) 1137 if (iter < 0)
1135 break; 1138 break;
1136 cat_size += (iter == 0 ? 0 : sizeof(u16)); 1139 cat_size += (iter == 0 ? 0 : sizeof(u16));
@@ -1138,7 +1141,8 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def,
1138 return -ENOSPC; 1141 return -ENOSPC;
1139 array[array_cnt++] = iter; 1142 array[array_cnt++] = iter;
1140 1143
1141 iter = netlbl_secattr_catmap_walk_rng(secattr->mls_cat, iter); 1144 iter = netlbl_secattr_catmap_walk_rng(secattr->attr.mls.cat,
1145 iter);
1142 if (iter < 0) 1146 if (iter < 0)
1143 return -EFAULT; 1147 return -EFAULT;
1144 cat_size += sizeof(u16); 1148 cat_size += sizeof(u16);
@@ -1191,7 +1195,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
1191 else 1195 else
1192 cat_low = 0; 1196 cat_low = 0;
1193 1197
1194 ret_val = netlbl_secattr_catmap_setrng(secattr->mls_cat, 1198 ret_val = netlbl_secattr_catmap_setrng(secattr->attr.mls.cat,
1195 cat_low, 1199 cat_low,
1196 cat_high, 1200 cat_high,
1197 GFP_ATOMIC); 1201 GFP_ATOMIC);
@@ -1251,7 +1255,9 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
1251 if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0) 1255 if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0)
1252 return -EPERM; 1256 return -EPERM;
1253 1257
1254 ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); 1258 ret_val = cipso_v4_map_lvl_hton(doi_def,
1259 secattr->attr.mls.lvl,
1260 &level);
1255 if (ret_val != 0) 1261 if (ret_val != 0)
1256 return ret_val; 1262 return ret_val;
1257 1263
@@ -1303,12 +1309,13 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
1303 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); 1309 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
1304 if (ret_val != 0) 1310 if (ret_val != 0)
1305 return ret_val; 1311 return ret_val;
1306 secattr->mls_lvl = level; 1312 secattr->attr.mls.lvl = level;
1307 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1313 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1308 1314
1309 if (tag_len > 4) { 1315 if (tag_len > 4) {
1310 secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); 1316 secattr->attr.mls.cat =
1311 if (secattr->mls_cat == NULL) 1317 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1318 if (secattr->attr.mls.cat == NULL)
1312 return -ENOMEM; 1319 return -ENOMEM;
1313 1320
1314 ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, 1321 ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
@@ -1316,7 +1323,7 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
1316 tag_len - 4, 1323 tag_len - 4,
1317 secattr); 1324 secattr);
1318 if (ret_val != 0) { 1325 if (ret_val != 0) {
1319 netlbl_secattr_catmap_free(secattr->mls_cat); 1326 netlbl_secattr_catmap_free(secattr->attr.mls.cat);
1320 return ret_val; 1327 return ret_val;
1321 } 1328 }
1322 1329
@@ -1350,7 +1357,9 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def,
1350 if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) 1357 if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL))
1351 return -EPERM; 1358 return -EPERM;
1352 1359
1353 ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); 1360 ret_val = cipso_v4_map_lvl_hton(doi_def,
1361 secattr->attr.mls.lvl,
1362 &level);
1354 if (ret_val != 0) 1363 if (ret_val != 0)
1355 return ret_val; 1364 return ret_val;
1356 1365
@@ -1396,12 +1405,13 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def,
1396 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); 1405 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
1397 if (ret_val != 0) 1406 if (ret_val != 0)
1398 return ret_val; 1407 return ret_val;
1399 secattr->mls_lvl = level; 1408 secattr->attr.mls.lvl = level;
1400 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1409 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1401 1410
1402 if (tag_len > 4) { 1411 if (tag_len > 4) {
1403 secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); 1412 secattr->attr.mls.cat =
1404 if (secattr->mls_cat == NULL) 1413 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1414 if (secattr->attr.mls.cat == NULL)
1405 return -ENOMEM; 1415 return -ENOMEM;
1406 1416
1407 ret_val = cipso_v4_map_cat_enum_ntoh(doi_def, 1417 ret_val = cipso_v4_map_cat_enum_ntoh(doi_def,
@@ -1409,7 +1419,7 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def,
1409 tag_len - 4, 1419 tag_len - 4,
1410 secattr); 1420 secattr);
1411 if (ret_val != 0) { 1421 if (ret_val != 0) {
1412 netlbl_secattr_catmap_free(secattr->mls_cat); 1422 netlbl_secattr_catmap_free(secattr->attr.mls.cat);
1413 return ret_val; 1423 return ret_val;
1414 } 1424 }
1415 1425
@@ -1443,7 +1453,9 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def,
1443 if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) 1453 if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL))
1444 return -EPERM; 1454 return -EPERM;
1445 1455
1446 ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); 1456 ret_val = cipso_v4_map_lvl_hton(doi_def,
1457 secattr->attr.mls.lvl,
1458 &level);
1447 if (ret_val != 0) 1459 if (ret_val != 0)
1448 return ret_val; 1460 return ret_val;
1449 1461
@@ -1488,12 +1500,13 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
1488 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); 1500 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
1489 if (ret_val != 0) 1501 if (ret_val != 0)
1490 return ret_val; 1502 return ret_val;
1491 secattr->mls_lvl = level; 1503 secattr->attr.mls.lvl = level;
1492 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1504 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1493 1505
1494 if (tag_len > 4) { 1506 if (tag_len > 4) {
1495 secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); 1507 secattr->attr.mls.cat =
1496 if (secattr->mls_cat == NULL) 1508 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1509 if (secattr->attr.mls.cat == NULL)
1497 return -ENOMEM; 1510 return -ENOMEM;
1498 1511
1499 ret_val = cipso_v4_map_cat_rng_ntoh(doi_def, 1512 ret_val = cipso_v4_map_cat_rng_ntoh(doi_def,
@@ -1501,7 +1514,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
1501 tag_len - 4, 1514 tag_len - 4,
1502 secattr); 1515 secattr);
1503 if (ret_val != 0) { 1516 if (ret_val != 0) {
1504 netlbl_secattr_catmap_free(secattr->mls_cat); 1517 netlbl_secattr_catmap_free(secattr->attr.mls.cat);
1505 return ret_val; 1518 return ret_val;
1506 } 1519 }
1507 1520
@@ -1850,6 +1863,8 @@ static int cipso_v4_getattr(const unsigned char *cipso,
1850 ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); 1863 ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr);
1851 break; 1864 break;
1852 } 1865 }
1866 if (ret_val == 0)
1867 secattr->type = NETLBL_NLTYPE_CIPSOV4;
1853 1868
1854getattr_return: 1869getattr_return:
1855 rcu_read_unlock(); 1870 rcu_read_unlock();
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 0301dd468cf4..0c0c73f368ce 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -40,7 +40,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
40 40
41 oif = sk->sk_bound_dev_if; 41 oif = sk->sk_bound_dev_if;
42 saddr = inet->saddr; 42 saddr = inet->saddr;
43 if (MULTICAST(usin->sin_addr.s_addr)) { 43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
44 if (!oif) 44 if (!oif)
45 oif = inet->mc_index; 45 oif = inet->mc_index;
46 if (!saddr) 46 if (!saddr)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 55d199e4ae21..f282b26f63eb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -62,8 +62,9 @@
62#include <net/route.h> 62#include <net/route.h>
63#include <net/ip_fib.h> 63#include <net/ip_fib.h>
64#include <net/rtnetlink.h> 64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
65 66
66struct ipv4_devconf ipv4_devconf = { 67static struct ipv4_devconf ipv4_devconf = {
67 .data = { 68 .data = {
68 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, 69 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, 70 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
@@ -82,7 +83,8 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
82 }, 83 },
83}; 84};
84 85
85#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) 86#define IPV4_DEVCONF_DFLT(net, attr) \
87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86 88
87static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 [IFA_LOCAL] = { .type = NLA_U32 }, 90 [IFA_LOCAL] = { .type = NLA_U32 },
@@ -98,9 +100,15 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 100static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 int destroy); 101 int destroy);
100#ifdef CONFIG_SYSCTL 102#ifdef CONFIG_SYSCTL
101static void devinet_sysctl_register(struct in_device *in_dev, 103static void devinet_sysctl_register(struct in_device *idev);
102 struct ipv4_devconf *p); 104static void devinet_sysctl_unregister(struct in_device *idev);
103static void devinet_sysctl_unregister(struct ipv4_devconf *p); 105#else
106static inline void devinet_sysctl_register(struct in_device *idev)
107{
108}
109static inline void devinet_sysctl_unregister(struct in_device *idev)
110{
111}
104#endif 112#endif
105 113
106/* Locks all the inet devices. */ 114/* Locks all the inet devices. */
@@ -157,24 +165,18 @@ static struct in_device *inetdev_init(struct net_device *dev)
157 if (!in_dev) 165 if (!in_dev)
158 goto out; 166 goto out;
159 INIT_RCU_HEAD(&in_dev->rcu_head); 167 INIT_RCU_HEAD(&in_dev->rcu_head);
160 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); 168 memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169 sizeof(in_dev->cnf));
161 in_dev->cnf.sysctl = NULL; 170 in_dev->cnf.sysctl = NULL;
162 in_dev->dev = dev; 171 in_dev->dev = dev;
163 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) 172 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
164 goto out_kfree; 173 goto out_kfree;
165 /* Reference in_dev->dev */ 174 /* Reference in_dev->dev */
166 dev_hold(dev); 175 dev_hold(dev);
167#ifdef CONFIG_SYSCTL
168 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
169 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
170#endif
171
172 /* Account for reference dev->ip_ptr (below) */ 176 /* Account for reference dev->ip_ptr (below) */
173 in_dev_hold(in_dev); 177 in_dev_hold(in_dev);
174 178
175#ifdef CONFIG_SYSCTL 179 devinet_sysctl_register(in_dev);
176 devinet_sysctl_register(in_dev, &in_dev->cnf);
177#endif
178 ip_mc_init_dev(in_dev); 180 ip_mc_init_dev(in_dev);
179 if (dev->flags & IFF_UP) 181 if (dev->flags & IFF_UP)
180 ip_mc_up(in_dev); 182 ip_mc_up(in_dev);
@@ -213,15 +215,9 @@ static void inetdev_destroy(struct in_device *in_dev)
213 inet_free_ifa(ifa); 215 inet_free_ifa(ifa);
214 } 216 }
215 217
216#ifdef CONFIG_SYSCTL
217 devinet_sysctl_unregister(&in_dev->cnf);
218#endif
219
220 dev->ip_ptr = NULL; 218 dev->ip_ptr = NULL;
221 219
222#ifdef CONFIG_SYSCTL 220 devinet_sysctl_unregister(in_dev);
223 neigh_sysctl_unregister(in_dev->arp_parms);
224#endif
225 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 221 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
226 arp_ifdown(dev); 222 arp_ifdown(dev);
227 223
@@ -408,17 +404,17 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
408 in_dev_hold(in_dev); 404 in_dev_hold(in_dev);
409 ifa->ifa_dev = in_dev; 405 ifa->ifa_dev = in_dev;
410 } 406 }
411 if (LOOPBACK(ifa->ifa_local)) 407 if (ipv4_is_loopback(ifa->ifa_local))
412 ifa->ifa_scope = RT_SCOPE_HOST; 408 ifa->ifa_scope = RT_SCOPE_HOST;
413 return inet_insert_ifa(ifa); 409 return inet_insert_ifa(ifa);
414} 410}
415 411
416struct in_device *inetdev_by_index(int ifindex) 412struct in_device *inetdev_by_index(struct net *net, int ifindex)
417{ 413{
418 struct net_device *dev; 414 struct net_device *dev;
419 struct in_device *in_dev = NULL; 415 struct in_device *in_dev = NULL;
420 read_lock(&dev_base_lock); 416 read_lock(&dev_base_lock);
421 dev = __dev_get_by_index(&init_net, ifindex); 417 dev = __dev_get_by_index(net, ifindex);
422 if (dev) 418 if (dev)
423 in_dev = in_dev_get(dev); 419 in_dev = in_dev_get(dev);
424 read_unlock(&dev_base_lock); 420 read_unlock(&dev_base_lock);
@@ -441,6 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
441 437
442static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
443{ 439{
440 struct net *net = skb->sk->sk_net;
444 struct nlattr *tb[IFA_MAX+1]; 441 struct nlattr *tb[IFA_MAX+1];
445 struct in_device *in_dev; 442 struct in_device *in_dev;
446 struct ifaddrmsg *ifm; 443 struct ifaddrmsg *ifm;
@@ -449,12 +446,15 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
449 446
450 ASSERT_RTNL(); 447 ASSERT_RTNL();
451 448
449 if (net != &init_net)
450 return -EINVAL;
451
452 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 452 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 if (err < 0) 453 if (err < 0)
454 goto errout; 454 goto errout;
455 455
456 ifm = nlmsg_data(nlh); 456 ifm = nlmsg_data(nlh);
457 in_dev = inetdev_by_index(ifm->ifa_index); 457 in_dev = inetdev_by_index(net, ifm->ifa_index);
458 if (in_dev == NULL) { 458 if (in_dev == NULL) {
459 err = -ENODEV; 459 err = -ENODEV;
460 goto errout; 460 goto errout;
@@ -485,49 +485,43 @@ errout:
485 return err; 485 return err;
486} 486}
487 487
488static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) 488static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
489{ 489{
490 struct nlattr *tb[IFA_MAX+1]; 490 struct nlattr *tb[IFA_MAX+1];
491 struct in_ifaddr *ifa; 491 struct in_ifaddr *ifa;
492 struct ifaddrmsg *ifm; 492 struct ifaddrmsg *ifm;
493 struct net_device *dev; 493 struct net_device *dev;
494 struct in_device *in_dev; 494 struct in_device *in_dev;
495 int err = -EINVAL; 495 int err;
496 496
497 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 497 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 if (err < 0) 498 if (err < 0)
499 goto errout; 499 goto errout;
500 500
501 ifm = nlmsg_data(nlh); 501 ifm = nlmsg_data(nlh);
502 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) { 502 err = -EINVAL;
503 err = -EINVAL; 503 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
504 goto errout; 504 goto errout;
505 }
506 505
507 dev = __dev_get_by_index(&init_net, ifm->ifa_index); 506 dev = __dev_get_by_index(net, ifm->ifa_index);
508 if (dev == NULL) { 507 err = -ENODEV;
509 err = -ENODEV; 508 if (dev == NULL)
510 goto errout; 509 goto errout;
511 }
512 510
513 in_dev = __in_dev_get_rtnl(dev); 511 in_dev = __in_dev_get_rtnl(dev);
514 if (in_dev == NULL) { 512 err = -ENOBUFS;
515 err = -ENOBUFS; 513 if (in_dev == NULL)
516 goto errout; 514 goto errout;
517 }
518
519 ipv4_devconf_setall(in_dev);
520 515
521 ifa = inet_alloc_ifa(); 516 ifa = inet_alloc_ifa();
522 if (ifa == NULL) { 517 if (ifa == NULL)
523 /* 518 /*
524 * A potential indev allocation can be left alive, it stays 519 * A potential indev allocation can be left alive, it stays
525 * assigned to its device and is destroy with it. 520 * assigned to its device and is destroy with it.
526 */ 521 */
527 err = -ENOBUFS;
528 goto errout; 522 goto errout;
529 }
530 523
524 ipv4_devconf_setall(in_dev);
531 in_dev_hold(in_dev); 525 in_dev_hold(in_dev);
532 526
533 if (tb[IFA_ADDRESS] == NULL) 527 if (tb[IFA_ADDRESS] == NULL)
@@ -561,11 +555,15 @@ errout:
561 555
562static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 556static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
563{ 557{
558 struct net *net = skb->sk->sk_net;
564 struct in_ifaddr *ifa; 559 struct in_ifaddr *ifa;
565 560
566 ASSERT_RTNL(); 561 ASSERT_RTNL();
567 562
568 ifa = rtm_to_ifaddr(nlh); 563 if (net != &init_net)
564 return -EINVAL;
565
566 ifa = rtm_to_ifaddr(net, nlh);
569 if (IS_ERR(ifa)) 567 if (IS_ERR(ifa))
570 return PTR_ERR(ifa); 568 return PTR_ERR(ifa);
571 569
@@ -580,7 +578,7 @@ static __inline__ int inet_abc_len(__be32 addr)
580{ 578{
581 int rc = -1; /* Something else, probably a multicast. */ 579 int rc = -1; /* Something else, probably a multicast. */
582 580
583 if (ZERONET(addr)) 581 if (ipv4_is_zeronet(addr))
584 rc = 0; 582 rc = 0;
585 else { 583 else {
586 __u32 haddr = ntohl(addr); 584 __u32 haddr = ntohl(addr);
@@ -965,28 +963,25 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
965 963
966/* 964/*
967 * Confirm that local IP address exists using wildcards: 965 * Confirm that local IP address exists using wildcards:
968 * - dev: only on this interface, 0=any interface 966 * - in_dev: only on this interface, 0=any interface
969 * - dst: only in the same subnet as dst, 0=any dst 967 * - dst: only in the same subnet as dst, 0=any dst
970 * - local: address, 0=autoselect the local address 968 * - local: address, 0=autoselect the local address
971 * - scope: maximum allowed scope value for the local address 969 * - scope: maximum allowed scope value for the local address
972 */ 970 */
973__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) 971__be32 inet_confirm_addr(struct in_device *in_dev,
972 __be32 dst, __be32 local, int scope)
974{ 973{
975 __be32 addr = 0; 974 __be32 addr = 0;
976 struct in_device *in_dev; 975 struct net_device *dev;
977 976 struct net *net;
978 if (dev) {
979 rcu_read_lock();
980 if ((in_dev = __in_dev_get_rcu(dev)))
981 addr = confirm_addr_indev(in_dev, dst, local, scope);
982 rcu_read_unlock();
983 977
984 return addr; 978 if (scope != RT_SCOPE_LINK)
985 } 979 return confirm_addr_indev(in_dev, dst, local, scope);
986 980
981 net = in_dev->dev->nd_net;
987 read_lock(&dev_base_lock); 982 read_lock(&dev_base_lock);
988 rcu_read_lock(); 983 rcu_read_lock();
989 for_each_netdev(&init_net, dev) { 984 for_each_netdev(net, dev) {
990 if ((in_dev = __in_dev_get_rcu(dev))) { 985 if ((in_dev = __in_dev_get_rcu(dev))) {
991 addr = confirm_addr_indev(in_dev, dst, local, scope); 986 addr = confirm_addr_indev(in_dev, dst, local, scope);
992 if (addr) 987 if (addr)
@@ -1028,7 +1023,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1028 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1023 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029 if (named++ == 0) 1024 if (named++ == 0)
1030 continue; 1025 continue;
1031 dot = strchr(ifa->ifa_label, ':'); 1026 dot = strchr(old, ':');
1032 if (dot == NULL) { 1027 if (dot == NULL) {
1033 sprintf(old, ":%d", named); 1028 sprintf(old, ":%d", named);
1034 dot = old; 1029 dot = old;
@@ -1107,13 +1102,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1107 */ 1102 */
1108 inetdev_changename(dev, in_dev); 1103 inetdev_changename(dev, in_dev);
1109 1104
1110#ifdef CONFIG_SYSCTL 1105 devinet_sysctl_unregister(in_dev);
1111 devinet_sysctl_unregister(&in_dev->cnf); 1106 devinet_sysctl_register(in_dev);
1112 neigh_sysctl_unregister(in_dev->arp_parms);
1113 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1114 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1115 devinet_sysctl_register(in_dev, &in_dev->cnf);
1116#endif
1117 break; 1107 break;
1118 } 1108 }
1119out: 1109out:
@@ -1175,15 +1165,19 @@ nla_put_failure:
1175 1165
1176static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1166static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1177{ 1167{
1168 struct net *net = skb->sk->sk_net;
1178 int idx, ip_idx; 1169 int idx, ip_idx;
1179 struct net_device *dev; 1170 struct net_device *dev;
1180 struct in_device *in_dev; 1171 struct in_device *in_dev;
1181 struct in_ifaddr *ifa; 1172 struct in_ifaddr *ifa;
1182 int s_ip_idx, s_idx = cb->args[0]; 1173 int s_ip_idx, s_idx = cb->args[0];
1183 1174
1175 if (net != &init_net)
1176 return 0;
1177
1184 s_ip_idx = ip_idx = cb->args[1]; 1178 s_ip_idx = ip_idx = cb->args[1];
1185 idx = 0; 1179 idx = 0;
1186 for_each_netdev(&init_net, dev) { 1180 for_each_netdev(net, dev) {
1187 if (idx < s_idx) 1181 if (idx < s_idx)
1188 goto cont; 1182 goto cont;
1189 if (idx > s_idx) 1183 if (idx > s_idx)
@@ -1217,7 +1211,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1217 struct sk_buff *skb; 1211 struct sk_buff *skb;
1218 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1212 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1219 int err = -ENOBUFS; 1213 int err = -ENOBUFS;
1214 struct net *net;
1220 1215
1216 net = ifa->ifa_dev->dev->nd_net;
1221 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1217 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1222 if (skb == NULL) 1218 if (skb == NULL)
1223 goto errout; 1219 goto errout;
@@ -1229,30 +1225,52 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1229 kfree_skb(skb); 1225 kfree_skb(skb);
1230 goto errout; 1226 goto errout;
1231 } 1227 }
1232 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1228 err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1233errout: 1229errout:
1234 if (err < 0) 1230 if (err < 0)
1235 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); 1231 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1236} 1232}
1237 1233
1238#ifdef CONFIG_SYSCTL 1234#ifdef CONFIG_SYSCTL
1239 1235
1240static void devinet_copy_dflt_conf(int i) 1236static void devinet_copy_dflt_conf(struct net *net, int i)
1241{ 1237{
1242 struct net_device *dev; 1238 struct net_device *dev;
1243 1239
1244 read_lock(&dev_base_lock); 1240 read_lock(&dev_base_lock);
1245 for_each_netdev(&init_net, dev) { 1241 for_each_netdev(net, dev) {
1246 struct in_device *in_dev; 1242 struct in_device *in_dev;
1247 rcu_read_lock(); 1243 rcu_read_lock();
1248 in_dev = __in_dev_get_rcu(dev); 1244 in_dev = __in_dev_get_rcu(dev);
1249 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1245 if (in_dev && !test_bit(i, in_dev->cnf.state))
1250 in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i]; 1246 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1251 rcu_read_unlock(); 1247 rcu_read_unlock();
1252 } 1248 }
1253 read_unlock(&dev_base_lock); 1249 read_unlock(&dev_base_lock);
1254} 1250}
1255 1251
1252static void inet_forward_change(struct net *net)
1253{
1254 struct net_device *dev;
1255 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1256
1257 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1258 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1259
1260 read_lock(&dev_base_lock);
1261 for_each_netdev(net, dev) {
1262 struct in_device *in_dev;
1263 rcu_read_lock();
1264 in_dev = __in_dev_get_rcu(dev);
1265 if (in_dev)
1266 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1267 rcu_read_unlock();
1268 }
1269 read_unlock(&dev_base_lock);
1270
1271 rt_cache_flush(0);
1272}
1273
1256static int devinet_conf_proc(ctl_table *ctl, int write, 1274static int devinet_conf_proc(ctl_table *ctl, int write,
1257 struct file* filp, void __user *buffer, 1275 struct file* filp, void __user *buffer,
1258 size_t *lenp, loff_t *ppos) 1276 size_t *lenp, loff_t *ppos)
@@ -1261,12 +1279,13 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1261 1279
1262 if (write) { 1280 if (write) {
1263 struct ipv4_devconf *cnf = ctl->extra1; 1281 struct ipv4_devconf *cnf = ctl->extra1;
1282 struct net *net = ctl->extra2;
1264 int i = (int *)ctl->data - cnf->data; 1283 int i = (int *)ctl->data - cnf->data;
1265 1284
1266 set_bit(i, cnf->state); 1285 set_bit(i, cnf->state);
1267 1286
1268 if (cnf == &ipv4_devconf_dflt) 1287 if (cnf == net->ipv4.devconf_dflt)
1269 devinet_copy_dflt_conf(i); 1288 devinet_copy_dflt_conf(net, i);
1270 } 1289 }
1271 1290
1272 return ret; 1291 return ret;
@@ -1277,6 +1296,7 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1277 void __user *newval, size_t newlen) 1296 void __user *newval, size_t newlen)
1278{ 1297{
1279 struct ipv4_devconf *cnf; 1298 struct ipv4_devconf *cnf;
1299 struct net *net;
1280 int *valp = table->data; 1300 int *valp = table->data;
1281 int new; 1301 int new;
1282 int i; 1302 int i;
@@ -1312,38 +1332,17 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1312 *valp = new; 1332 *valp = new;
1313 1333
1314 cnf = table->extra1; 1334 cnf = table->extra1;
1335 net = table->extra2;
1315 i = (int *)table->data - cnf->data; 1336 i = (int *)table->data - cnf->data;
1316 1337
1317 set_bit(i, cnf->state); 1338 set_bit(i, cnf->state);
1318 1339
1319 if (cnf == &ipv4_devconf_dflt) 1340 if (cnf == net->ipv4.devconf_dflt)
1320 devinet_copy_dflt_conf(i); 1341 devinet_copy_dflt_conf(net, i);
1321 1342
1322 return 1; 1343 return 1;
1323} 1344}
1324 1345
1325void inet_forward_change(void)
1326{
1327 struct net_device *dev;
1328 int on = IPV4_DEVCONF_ALL(FORWARDING);
1329
1330 IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1331 IPV4_DEVCONF_DFLT(FORWARDING) = on;
1332
1333 read_lock(&dev_base_lock);
1334 for_each_netdev(&init_net, dev) {
1335 struct in_device *in_dev;
1336 rcu_read_lock();
1337 in_dev = __in_dev_get_rcu(dev);
1338 if (in_dev)
1339 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1340 rcu_read_unlock();
1341 }
1342 read_unlock(&dev_base_lock);
1343
1344 rt_cache_flush(0);
1345}
1346
1347static int devinet_sysctl_forward(ctl_table *ctl, int write, 1346static int devinet_sysctl_forward(ctl_table *ctl, int write,
1348 struct file* filp, void __user *buffer, 1347 struct file* filp, void __user *buffer,
1349 size_t *lenp, loff_t *ppos) 1348 size_t *lenp, loff_t *ppos)
@@ -1353,9 +1352,11 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1353 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1352 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354 1353
1355 if (write && *valp != val) { 1354 if (write && *valp != val) {
1356 if (valp == &IPV4_DEVCONF_ALL(FORWARDING)) 1355 struct net *net = ctl->extra2;
1357 inet_forward_change(); 1356
1358 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING)) 1357 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1358 inet_forward_change(net);
1359 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1359 rt_cache_flush(0); 1360 rt_cache_flush(0);
1360 } 1361 }
1361 1362
@@ -1420,11 +1421,8 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1420 1421
1421static struct devinet_sysctl_table { 1422static struct devinet_sysctl_table {
1422 struct ctl_table_header *sysctl_header; 1423 struct ctl_table_header *sysctl_header;
1423 ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; 1424 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1424 ctl_table devinet_dev[2]; 1425 char *dev_name;
1425 ctl_table devinet_conf_dir[2];
1426 ctl_table devinet_proto_dir[2];
1427 ctl_table devinet_root_dir[2];
1428} devinet_sysctl = { 1426} devinet_sysctl = {
1429 .devinet_vars = { 1427 .devinet_vars = {
1430 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1428 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -1456,62 +1454,32 @@ static struct devinet_sysctl_table {
1456 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1454 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1457 "promote_secondaries"), 1455 "promote_secondaries"),
1458 }, 1456 },
1459 .devinet_dev = {
1460 {
1461 .ctl_name = NET_PROTO_CONF_ALL,
1462 .procname = "all",
1463 .mode = 0555,
1464 .child = devinet_sysctl.devinet_vars,
1465 },
1466 },
1467 .devinet_conf_dir = {
1468 {
1469 .ctl_name = NET_IPV4_CONF,
1470 .procname = "conf",
1471 .mode = 0555,
1472 .child = devinet_sysctl.devinet_dev,
1473 },
1474 },
1475 .devinet_proto_dir = {
1476 {
1477 .ctl_name = NET_IPV4,
1478 .procname = "ipv4",
1479 .mode = 0555,
1480 .child = devinet_sysctl.devinet_conf_dir,
1481 },
1482 },
1483 .devinet_root_dir = {
1484 {
1485 .ctl_name = CTL_NET,
1486 .procname = "net",
1487 .mode = 0555,
1488 .child = devinet_sysctl.devinet_proto_dir,
1489 },
1490 },
1491}; 1457};
1492 1458
1493static void devinet_sysctl_register(struct in_device *in_dev, 1459static int __devinet_sysctl_register(struct net *net, char *dev_name,
1494 struct ipv4_devconf *p) 1460 int ctl_name, struct ipv4_devconf *p)
1495{ 1461{
1496 int i; 1462 int i;
1497 struct net_device *dev = in_dev ? in_dev->dev : NULL; 1463 struct devinet_sysctl_table *t;
1498 struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t), 1464
1499 GFP_KERNEL); 1465#define DEVINET_CTL_PATH_DEV 3
1500 char *dev_name = NULL;
1501 1466
1467 struct ctl_path devinet_ctl_path[] = {
1468 { .procname = "net", .ctl_name = CTL_NET, },
1469 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1470 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1471 { /* to be set */ },
1472 { },
1473 };
1474
1475 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1502 if (!t) 1476 if (!t)
1503 return; 1477 goto out;
1478
1504 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 1479 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1505 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 1480 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1506 t->devinet_vars[i].extra1 = p; 1481 t->devinet_vars[i].extra1 = p;
1507 } 1482 t->devinet_vars[i].extra2 = net;
1508
1509 if (dev) {
1510 dev_name = dev->name;
1511 t->devinet_dev[0].ctl_name = dev->ifindex;
1512 } else {
1513 dev_name = "default";
1514 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1515 } 1483 }
1516 1484
1517 /* 1485 /*
@@ -1519,56 +1487,183 @@ static void devinet_sysctl_register(struct in_device *in_dev,
1519 * by sysctl and we wouldn't want anyone to change it under our feet 1487 * by sysctl and we wouldn't want anyone to change it under our feet
1520 * (see SIOCSIFNAME). 1488 * (see SIOCSIFNAME).
1521 */ 1489 */
1522 dev_name = kstrdup(dev_name, GFP_KERNEL); 1490 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1523 if (!dev_name) 1491 if (!t->dev_name)
1524 goto free; 1492 goto free;
1525 1493
1526 t->devinet_dev[0].procname = dev_name; 1494 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1527 t->devinet_dev[0].child = t->devinet_vars; 1495 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1528 t->devinet_conf_dir[0].child = t->devinet_dev;
1529 t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1530 t->devinet_root_dir[0].child = t->devinet_proto_dir;
1531 1496
1532 t->sysctl_header = register_sysctl_table(t->devinet_root_dir); 1497 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1498 t->devinet_vars);
1533 if (!t->sysctl_header) 1499 if (!t->sysctl_header)
1534 goto free_procname; 1500 goto free_procname;
1535 1501
1536 p->sysctl = t; 1502 p->sysctl = t;
1537 return; 1503 return 0;
1504
1505free_procname:
1506 kfree(t->dev_name);
1507free:
1508 kfree(t);
1509out:
1510 return -ENOBUFS;
1511}
1512
1513static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1514{
1515 struct devinet_sysctl_table *t = cnf->sysctl;
1516
1517 if (t == NULL)
1518 return;
1538 1519
1539 /* error path */ 1520 cnf->sysctl = NULL;
1540 free_procname: 1521 unregister_sysctl_table(t->sysctl_header);
1541 kfree(dev_name); 1522 kfree(t->dev_name);
1542 free:
1543 kfree(t); 1523 kfree(t);
1544 return;
1545} 1524}
1546 1525
1547static void devinet_sysctl_unregister(struct ipv4_devconf *p) 1526static void devinet_sysctl_register(struct in_device *idev)
1548{ 1527{
1549 if (p->sysctl) { 1528 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1550 struct devinet_sysctl_table *t = p->sysctl; 1529 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1551 p->sysctl = NULL; 1530 __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1552 unregister_sysctl_table(t->sysctl_header); 1531 idev->dev->ifindex, &idev->cnf);
1553 kfree(t->devinet_dev[0].procname); 1532}
1554 kfree(t); 1533
1534static void devinet_sysctl_unregister(struct in_device *idev)
1535{
1536 __devinet_sysctl_unregister(&idev->cnf);
1537 neigh_sysctl_unregister(idev->arp_parms);
1538}
1539
1540static struct ctl_table ctl_forward_entry[] = {
1541 {
1542 .ctl_name = NET_IPV4_FORWARD,
1543 .procname = "ip_forward",
1544 .data = &ipv4_devconf.data[
1545 NET_IPV4_CONF_FORWARDING - 1],
1546 .maxlen = sizeof(int),
1547 .mode = 0644,
1548 .proc_handler = devinet_sysctl_forward,
1549 .strategy = devinet_conf_sysctl,
1550 .extra1 = &ipv4_devconf,
1551 .extra2 = &init_net,
1552 },
1553 { },
1554};
1555
1556static __net_initdata struct ctl_path net_ipv4_path[] = {
1557 { .procname = "net", .ctl_name = CTL_NET, },
1558 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1559 { },
1560};
1561#endif
1562
1563static __net_init int devinet_init_net(struct net *net)
1564{
1565 int err;
1566 struct ipv4_devconf *all, *dflt;
1567#ifdef CONFIG_SYSCTL
1568 struct ctl_table *tbl = ctl_forward_entry;
1569 struct ctl_table_header *forw_hdr;
1570#endif
1571
1572 err = -ENOMEM;
1573 all = &ipv4_devconf;
1574 dflt = &ipv4_devconf_dflt;
1575
1576 if (net != &init_net) {
1577 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1578 if (all == NULL)
1579 goto err_alloc_all;
1580
1581 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1582 if (dflt == NULL)
1583 goto err_alloc_dflt;
1584
1585#ifdef CONFIG_SYSCTL
1586 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1587 if (tbl == NULL)
1588 goto err_alloc_ctl;
1589
1590 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1591 tbl[0].extra1 = all;
1592 tbl[0].extra2 = net;
1593#endif
1555 } 1594 }
1595
1596#ifdef CONFIG_SYSCTL
1597 err = __devinet_sysctl_register(net, "all",
1598 NET_PROTO_CONF_ALL, all);
1599 if (err < 0)
1600 goto err_reg_all;
1601
1602 err = __devinet_sysctl_register(net, "default",
1603 NET_PROTO_CONF_DEFAULT, dflt);
1604 if (err < 0)
1605 goto err_reg_dflt;
1606
1607 err = -ENOMEM;
1608 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1609 if (forw_hdr == NULL)
1610 goto err_reg_ctl;
1611 net->ipv4.forw_hdr = forw_hdr;
1612#endif
1613
1614 net->ipv4.devconf_all = all;
1615 net->ipv4.devconf_dflt = dflt;
1616 return 0;
1617
1618#ifdef CONFIG_SYSCTL
1619err_reg_ctl:
1620 __devinet_sysctl_unregister(dflt);
1621err_reg_dflt:
1622 __devinet_sysctl_unregister(all);
1623err_reg_all:
1624 if (tbl != ctl_forward_entry)
1625 kfree(tbl);
1626err_alloc_ctl:
1627#endif
1628 if (dflt != &ipv4_devconf_dflt)
1629 kfree(dflt);
1630err_alloc_dflt:
1631 if (all != &ipv4_devconf)
1632 kfree(all);
1633err_alloc_all:
1634 return err;
1556} 1635}
1636
1637static __net_exit void devinet_exit_net(struct net *net)
1638{
1639#ifdef CONFIG_SYSCTL
1640 struct ctl_table *tbl;
1641
1642 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1644 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1645 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1646 kfree(tbl);
1557#endif 1647#endif
1648 kfree(net->ipv4.devconf_dflt);
1649 kfree(net->ipv4.devconf_all);
1650}
1651
1652static __net_initdata struct pernet_operations devinet_ops = {
1653 .init = devinet_init_net,
1654 .exit = devinet_exit_net,
1655};
1558 1656
1559void __init devinet_init(void) 1657void __init devinet_init(void)
1560{ 1658{
1659 register_pernet_subsys(&devinet_ops);
1660
1561 register_gifconf(PF_INET, inet_gifconf); 1661 register_gifconf(PF_INET, inet_gifconf);
1562 register_netdevice_notifier(&ip_netdev_notifier); 1662 register_netdevice_notifier(&ip_netdev_notifier);
1563 1663
1564 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1664 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1565 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1665 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1566 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1666 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1567#ifdef CONFIG_SYSCTL
1568 devinet_sysctl.sysctl_header =
1569 register_sysctl_table(devinet_sysctl.devinet_root_dir);
1570 devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1571#endif
1572} 1667}
1573 1668
1574EXPORT_SYMBOL(in_dev_finish_destroy); 1669EXPORT_SYMBOL(in_dev_finish_destroy);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index cad4278025ad..258d17631b4b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1,26 +1,118 @@
1#include <crypto/aead.h>
2#include <crypto/authenc.h>
1#include <linux/err.h> 3#include <linux/err.h>
2#include <linux/module.h> 4#include <linux/module.h>
3#include <net/ip.h> 5#include <net/ip.h>
4#include <net/xfrm.h> 6#include <net/xfrm.h>
5#include <net/esp.h> 7#include <net/esp.h>
6#include <linux/scatterlist.h> 8#include <linux/scatterlist.h>
7#include <linux/crypto.h>
8#include <linux/kernel.h> 9#include <linux/kernel.h>
9#include <linux/pfkeyv2.h> 10#include <linux/pfkeyv2.h>
10#include <linux/random.h> 11#include <linux/rtnetlink.h>
12#include <linux/slab.h>
11#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/in6.h>
12#include <net/icmp.h> 15#include <net/icmp.h>
13#include <net/protocol.h> 16#include <net/protocol.h>
14#include <net/udp.h> 17#include <net/udp.h>
15 18
19struct esp_skb_cb {
20 struct xfrm_skb_cb xfrm;
21 void *tmp;
22};
23
24#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
25
26/*
27 * Allocate an AEAD request structure with extra space for SG and IV.
28 *
29 * For alignment considerations the IV is placed at the front, followed
30 * by the request and finally the SG list.
31 *
32 * TODO: Use spare space in skb for this where possible.
33 */
34static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
35{
36 unsigned int len;
37
38 len = crypto_aead_ivsize(aead);
39 if (len) {
40 len += crypto_aead_alignmask(aead) &
41 ~(crypto_tfm_ctx_alignment() - 1);
42 len = ALIGN(len, crypto_tfm_ctx_alignment());
43 }
44
45 len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
46 len = ALIGN(len, __alignof__(struct scatterlist));
47
48 len += sizeof(struct scatterlist) * nfrags;
49
50 return kmalloc(len, GFP_ATOMIC);
51}
52
53static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp)
54{
55 return crypto_aead_ivsize(aead) ?
56 PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp;
57}
58
59static inline struct aead_givcrypt_request *esp_tmp_givreq(
60 struct crypto_aead *aead, u8 *iv)
61{
62 struct aead_givcrypt_request *req;
63
64 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
65 crypto_tfm_ctx_alignment());
66 aead_givcrypt_set_tfm(req, aead);
67 return req;
68}
69
70static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
71{
72 struct aead_request *req;
73
74 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
75 crypto_tfm_ctx_alignment());
76 aead_request_set_tfm(req, aead);
77 return req;
78}
79
80static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
81 struct aead_request *req)
82{
83 return (void *)ALIGN((unsigned long)(req + 1) +
84 crypto_aead_reqsize(aead),
85 __alignof__(struct scatterlist));
86}
87
88static inline struct scatterlist *esp_givreq_sg(
89 struct crypto_aead *aead, struct aead_givcrypt_request *req)
90{
91 return (void *)ALIGN((unsigned long)(req + 1) +
92 crypto_aead_reqsize(aead),
93 __alignof__(struct scatterlist));
94}
95
96static void esp_output_done(struct crypto_async_request *base, int err)
97{
98 struct sk_buff *skb = base->data;
99
100 kfree(ESP_SKB_CB(skb)->tmp);
101 xfrm_output_resume(skb, err);
102}
103
16static int esp_output(struct xfrm_state *x, struct sk_buff *skb) 104static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
17{ 105{
18 int err; 106 int err;
19 struct ip_esp_hdr *esph; 107 struct ip_esp_hdr *esph;
20 struct crypto_blkcipher *tfm; 108 struct crypto_aead *aead;
21 struct blkcipher_desc desc; 109 struct aead_givcrypt_request *req;
110 struct scatterlist *sg;
111 struct scatterlist *asg;
22 struct esp_data *esp; 112 struct esp_data *esp;
23 struct sk_buff *trailer; 113 struct sk_buff *trailer;
114 void *tmp;
115 u8 *iv;
24 u8 *tail; 116 u8 *tail;
25 int blksize; 117 int blksize;
26 int clen; 118 int clen;
@@ -35,17 +127,26 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
35 clen = skb->len; 127 clen = skb->len;
36 128
37 esp = x->data; 129 esp = x->data;
38 alen = esp->auth.icv_trunc_len; 130 aead = esp->aead;
39 tfm = esp->conf.tfm; 131 alen = crypto_aead_authsize(aead);
40 desc.tfm = tfm; 132
41 desc.flags = 0; 133 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
42 blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
43 clen = ALIGN(clen + 2, blksize); 134 clen = ALIGN(clen + 2, blksize);
44 if (esp->conf.padlen) 135 if (esp->padlen)
45 clen = ALIGN(clen, esp->conf.padlen); 136 clen = ALIGN(clen, esp->padlen);
46 137
47 if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) 138 if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
48 goto error; 139 goto error;
140 nfrags = err;
141
142 tmp = esp_alloc_tmp(aead, nfrags + 1);
143 if (!tmp)
144 goto error;
145
146 iv = esp_tmp_iv(aead, tmp);
147 req = esp_tmp_givreq(aead, iv);
148 asg = esp_givreq_sg(aead, req);
149 sg = asg + 1;
49 150
50 /* Fill padding... */ 151 /* Fill padding... */
51 tail = skb_tail_pointer(trailer); 152 tail = skb_tail_pointer(trailer);
@@ -55,28 +156,34 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
55 tail[i] = i + 1; 156 tail[i] = i + 1;
56 } while (0); 157 } while (0);
57 tail[clen - skb->len - 2] = (clen - skb->len) - 2; 158 tail[clen - skb->len - 2] = (clen - skb->len) - 2;
58 pskb_put(skb, trailer, clen - skb->len); 159 tail[clen - skb->len - 1] = *skb_mac_header(skb);
160 pskb_put(skb, trailer, clen - skb->len + alen);
59 161
60 skb_push(skb, -skb_network_offset(skb)); 162 skb_push(skb, -skb_network_offset(skb));
61 esph = ip_esp_hdr(skb); 163 esph = ip_esp_hdr(skb);
62 *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb);
63 *skb_mac_header(skb) = IPPROTO_ESP; 164 *skb_mac_header(skb) = IPPROTO_ESP;
64 165
65 spin_lock_bh(&x->lock);
66
67 /* this is non-NULL only with UDP Encapsulation */ 166 /* this is non-NULL only with UDP Encapsulation */
68 if (x->encap) { 167 if (x->encap) {
69 struct xfrm_encap_tmpl *encap = x->encap; 168 struct xfrm_encap_tmpl *encap = x->encap;
70 struct udphdr *uh; 169 struct udphdr *uh;
71 __be32 *udpdata32; 170 __be32 *udpdata32;
171 unsigned int sport, dport;
172 int encap_type;
173
174 spin_lock_bh(&x->lock);
175 sport = encap->encap_sport;
176 dport = encap->encap_dport;
177 encap_type = encap->encap_type;
178 spin_unlock_bh(&x->lock);
72 179
73 uh = (struct udphdr *)esph; 180 uh = (struct udphdr *)esph;
74 uh->source = encap->encap_sport; 181 uh->source = sport;
75 uh->dest = encap->encap_dport; 182 uh->dest = dport;
76 uh->len = htons(skb->len + alen - skb_transport_offset(skb)); 183 uh->len = htons(skb->len - skb_transport_offset(skb));
77 uh->check = 0; 184 uh->check = 0;
78 185
79 switch (encap->encap_type) { 186 switch (encap_type) {
80 default: 187 default:
81 case UDP_ENCAP_ESPINUDP: 188 case UDP_ENCAP_ESPINUDP:
82 esph = (struct ip_esp_hdr *)(uh + 1); 189 esph = (struct ip_esp_hdr *)(uh + 1);
@@ -94,130 +201,55 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
94 esph->spi = x->id.spi; 201 esph->spi = x->id.spi;
95 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); 202 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
96 203
97 if (esp->conf.ivlen) { 204 sg_init_table(sg, nfrags);
98 if (unlikely(!esp->conf.ivinitted)) { 205 skb_to_sgvec(skb, sg,
99 get_random_bytes(esp->conf.ivec, esp->conf.ivlen); 206 esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
100 esp->conf.ivinitted = 1; 207 clen + alen);
101 } 208 sg_init_one(asg, esph, sizeof(*esph));
102 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); 209
103 } 210 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
104 211 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
105 do { 212 aead_givcrypt_set_assoc(req, asg, sizeof(*esph));
106 struct scatterlist *sg = &esp->sgbuf[0]; 213 aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq);
107 214
108 if (unlikely(nfrags > ESP_NUM_FAST_SG)) { 215 ESP_SKB_CB(skb)->tmp = tmp;
109 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); 216 err = crypto_aead_givencrypt(req);
110 if (!sg) 217 if (err == -EINPROGRESS)
111 goto unlock; 218 goto error;
112 }
113 sg_init_table(sg, nfrags);
114 sg_mark_end(sg, skb_to_sgvec(skb, sg, esph->enc_data +
115 esp->conf.ivlen -
116 skb->data, clen));
117 err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
118 if (unlikely(sg != &esp->sgbuf[0]))
119 kfree(sg);
120 } while (0);
121
122 if (unlikely(err))
123 goto unlock;
124
125 if (esp->conf.ivlen) {
126 memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
127 crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
128 }
129 219
130 if (esp->auth.icv_full_len) { 220 if (err == -EBUSY)
131 err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, 221 err = NET_XMIT_DROP;
132 sizeof(*esph) + esp->conf.ivlen + clen);
133 memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
134 }
135 222
136unlock: 223 kfree(tmp);
137 spin_unlock_bh(&x->lock);
138 224
139error: 225error:
140 return err; 226 return err;
141} 227}
142 228
143/* 229static int esp_input_done2(struct sk_buff *skb, int err)
144 * Note: detecting truncated vs. non-truncated authentication data is very
145 * expensive, so we only support truncated data, which is the recommended
146 * and common case.
147 */
148static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
149{ 230{
150 struct iphdr *iph; 231 struct iphdr *iph;
151 struct ip_esp_hdr *esph; 232 struct xfrm_state *x = xfrm_input_state(skb);
152 struct esp_data *esp = x->data; 233 struct esp_data *esp = x->data;
153 struct crypto_blkcipher *tfm = esp->conf.tfm; 234 struct crypto_aead *aead = esp->aead;
154 struct blkcipher_desc desc = { .tfm = tfm }; 235 int alen = crypto_aead_authsize(aead);
155 struct sk_buff *trailer; 236 int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
156 int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); 237 int elen = skb->len - hlen;
157 int alen = esp->auth.icv_trunc_len;
158 int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen;
159 int nfrags;
160 int ihl; 238 int ihl;
161 u8 nexthdr[2]; 239 u8 nexthdr[2];
162 struct scatterlist *sg;
163 int padlen; 240 int padlen;
164 int err;
165 241
166 if (!pskb_may_pull(skb, sizeof(*esph))) 242 kfree(ESP_SKB_CB(skb)->tmp);
167 goto out;
168
169 if (elen <= 0 || (elen & (blksize-1)))
170 goto out;
171 243
172 /* If integrity check is required, do this. */
173 if (esp->auth.icv_full_len) {
174 u8 sum[alen];
175
176 err = esp_mac_digest(esp, skb, 0, skb->len - alen);
177 if (err)
178 goto out;
179
180 if (skb_copy_bits(skb, skb->len - alen, sum, alen))
181 BUG();
182
183 if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
184 x->stats.integrity_failed++;
185 goto out;
186 }
187 }
188
189 if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0)
190 goto out;
191
192 skb->ip_summed = CHECKSUM_NONE;
193
194 esph = (struct ip_esp_hdr *)skb->data;
195
196 /* Get ivec. This can be wrong, check against another impls. */
197 if (esp->conf.ivlen)
198 crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
199
200 sg = &esp->sgbuf[0];
201
202 if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
203 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
204 if (!sg)
205 goto out;
206 }
207 sg_init_table(sg, nfrags);
208 sg_mark_end(sg, skb_to_sgvec(skb, sg, sizeof(*esph) + esp->conf.ivlen,
209 elen));
210 err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
211 if (unlikely(sg != &esp->sgbuf[0]))
212 kfree(sg);
213 if (unlikely(err)) 244 if (unlikely(err))
214 return err; 245 goto out;
215 246
216 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) 247 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
217 BUG(); 248 BUG();
218 249
250 err = -EINVAL;
219 padlen = nexthdr[0]; 251 padlen = nexthdr[0];
220 if (padlen+2 >= elen) 252 if (padlen + 2 + alen >= elen)
221 goto out; 253 goto out;
222 254
223 /* ... check padding bits here. Silly. :-) */ 255 /* ... check padding bits here. Silly. :-) */
@@ -263,23 +295,100 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
263 } 295 }
264 296
265 pskb_trim(skb, skb->len - alen - padlen - 2); 297 pskb_trim(skb, skb->len - alen - padlen - 2);
266 __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); 298 __skb_pull(skb, hlen);
267 skb_set_transport_header(skb, -ihl); 299 skb_set_transport_header(skb, -ihl);
268 300
269 return nexthdr[1]; 301 err = nexthdr[1];
302
303 /* RFC4303: Drop dummy packets without any error */
304 if (err == IPPROTO_NONE)
305 err = -EINVAL;
306
307out:
308 return err;
309}
310
311static void esp_input_done(struct crypto_async_request *base, int err)
312{
313 struct sk_buff *skb = base->data;
314
315 xfrm_input_resume(skb, esp_input_done2(skb, err));
316}
317
318/*
319 * Note: detecting truncated vs. non-truncated authentication data is very
320 * expensive, so we only support truncated data, which is the recommended
321 * and common case.
322 */
323static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
324{
325 struct ip_esp_hdr *esph;
326 struct esp_data *esp = x->data;
327 struct crypto_aead *aead = esp->aead;
328 struct aead_request *req;
329 struct sk_buff *trailer;
330 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
331 int nfrags;
332 void *tmp;
333 u8 *iv;
334 struct scatterlist *sg;
335 struct scatterlist *asg;
336 int err = -EINVAL;
337
338 if (!pskb_may_pull(skb, sizeof(*esph)))
339 goto out;
340
341 if (elen <= 0)
342 goto out;
343
344 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
345 goto out;
346 nfrags = err;
347
348 err = -ENOMEM;
349 tmp = esp_alloc_tmp(aead, nfrags + 1);
350 if (!tmp)
351 goto out;
352
353 ESP_SKB_CB(skb)->tmp = tmp;
354 iv = esp_tmp_iv(aead, tmp);
355 req = esp_tmp_req(aead, iv);
356 asg = esp_req_sg(aead, req);
357 sg = asg + 1;
358
359 skb->ip_summed = CHECKSUM_NONE;
360
361 esph = (struct ip_esp_hdr *)skb->data;
362
363 /* Get ivec. This can be wrong, check against another impls. */
364 iv = esph->enc_data;
365
366 sg_init_table(sg, nfrags);
367 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
368 sg_init_one(asg, esph, sizeof(*esph));
369
370 aead_request_set_callback(req, 0, esp_input_done, skb);
371 aead_request_set_crypt(req, sg, sg, elen, iv);
372 aead_request_set_assoc(req, asg, sizeof(*esph));
373
374 err = crypto_aead_decrypt(req);
375 if (err == -EINPROGRESS)
376 goto out;
377
378 err = esp_input_done2(skb, err);
270 379
271out: 380out:
272 return -EINVAL; 381 return err;
273} 382}
274 383
275static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) 384static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
276{ 385{
277 struct esp_data *esp = x->data; 386 struct esp_data *esp = x->data;
278 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 387 u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
279 u32 align = max_t(u32, blksize, esp->conf.padlen); 388 u32 align = max_t(u32, blksize, esp->padlen);
280 u32 rem; 389 u32 rem;
281 390
282 mtu -= x->props.header_len + esp->auth.icv_trunc_len; 391 mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
283 rem = mtu & (align - 1); 392 rem = mtu & (align - 1);
284 mtu &= ~(align - 1); 393 mtu &= ~(align - 1);
285 394
@@ -326,80 +435,143 @@ static void esp_destroy(struct xfrm_state *x)
326 if (!esp) 435 if (!esp)
327 return; 436 return;
328 437
329 crypto_free_blkcipher(esp->conf.tfm); 438 crypto_free_aead(esp->aead);
330 esp->conf.tfm = NULL;
331 kfree(esp->conf.ivec);
332 esp->conf.ivec = NULL;
333 crypto_free_hash(esp->auth.tfm);
334 esp->auth.tfm = NULL;
335 kfree(esp->auth.work_icv);
336 esp->auth.work_icv = NULL;
337 kfree(esp); 439 kfree(esp);
338} 440}
339 441
340static int esp_init_state(struct xfrm_state *x) 442static int esp_init_aead(struct xfrm_state *x)
341{ 443{
342 struct esp_data *esp = NULL; 444 struct esp_data *esp = x->data;
343 struct crypto_blkcipher *tfm; 445 struct crypto_aead *aead;
344 u32 align; 446 int err;
447
448 aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
449 err = PTR_ERR(aead);
450 if (IS_ERR(aead))
451 goto error;
452
453 esp->aead = aead;
454
455 err = crypto_aead_setkey(aead, x->aead->alg_key,
456 (x->aead->alg_key_len + 7) / 8);
457 if (err)
458 goto error;
459
460 err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8);
461 if (err)
462 goto error;
463
464error:
465 return err;
466}
467
468static int esp_init_authenc(struct xfrm_state *x)
469{
470 struct esp_data *esp = x->data;
471 struct crypto_aead *aead;
472 struct crypto_authenc_key_param *param;
473 struct rtattr *rta;
474 char *key;
475 char *p;
476 char authenc_name[CRYPTO_MAX_ALG_NAME];
477 unsigned int keylen;
478 int err;
345 479
480 err = -EINVAL;
346 if (x->ealg == NULL) 481 if (x->ealg == NULL)
347 goto error; 482 goto error;
348 483
349 esp = kzalloc(sizeof(*esp), GFP_KERNEL); 484 err = -ENAMETOOLONG;
350 if (esp == NULL) 485 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)",
351 return -ENOMEM; 486 x->aalg ? x->aalg->alg_name : "digest_null",
487 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
488 goto error;
489
490 aead = crypto_alloc_aead(authenc_name, 0, 0);
491 err = PTR_ERR(aead);
492 if (IS_ERR(aead))
493 goto error;
494
495 esp->aead = aead;
496
497 keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
498 (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
499 err = -ENOMEM;
500 key = kmalloc(keylen, GFP_KERNEL);
501 if (!key)
502 goto error;
503
504 p = key;
505 rta = (void *)p;
506 rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM;
507 rta->rta_len = RTA_LENGTH(sizeof(*param));
508 param = RTA_DATA(rta);
509 p += RTA_SPACE(sizeof(*param));
352 510
353 if (x->aalg) { 511 if (x->aalg) {
354 struct xfrm_algo_desc *aalg_desc; 512 struct xfrm_algo_desc *aalg_desc;
355 struct crypto_hash *hash;
356 513
357 hash = crypto_alloc_hash(x->aalg->alg_name, 0, 514 memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8);
358 CRYPTO_ALG_ASYNC); 515 p += (x->aalg->alg_key_len + 7) / 8;
359 if (IS_ERR(hash))
360 goto error;
361
362 esp->auth.tfm = hash;
363 if (crypto_hash_setkey(hash, x->aalg->alg_key,
364 (x->aalg->alg_key_len + 7) / 8))
365 goto error;
366 516
367 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 517 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
368 BUG_ON(!aalg_desc); 518 BUG_ON(!aalg_desc);
369 519
520 err = -EINVAL;
370 if (aalg_desc->uinfo.auth.icv_fullbits/8 != 521 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
371 crypto_hash_digestsize(hash)) { 522 crypto_aead_authsize(aead)) {
372 NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", 523 NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
373 x->aalg->alg_name, 524 x->aalg->alg_name,
374 crypto_hash_digestsize(hash), 525 crypto_aead_authsize(aead),
375 aalg_desc->uinfo.auth.icv_fullbits/8); 526 aalg_desc->uinfo.auth.icv_fullbits/8);
376 goto error; 527 goto free_key;
377 } 528 }
378 529
379 esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; 530 err = crypto_aead_setauthsize(
380 esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; 531 aead, aalg_desc->uinfo.auth.icv_truncbits / 8);
381 532 if (err)
382 esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL); 533 goto free_key;
383 if (!esp->auth.work_icv)
384 goto error;
385 } 534 }
386 535
387 tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); 536 param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
388 if (IS_ERR(tfm)) 537 memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8);
389 goto error; 538
390 esp->conf.tfm = tfm; 539 err = crypto_aead_setkey(aead, key, keylen);
391 esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); 540
392 esp->conf.padlen = 0; 541free_key:
393 if (esp->conf.ivlen) { 542 kfree(key);
394 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); 543
395 if (unlikely(esp->conf.ivec == NULL)) 544error:
396 goto error; 545 return err;
397 esp->conf.ivinitted = 0; 546}
398 } 547
399 if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, 548static int esp_init_state(struct xfrm_state *x)
400 (x->ealg->alg_key_len + 7) / 8)) 549{
550 struct esp_data *esp;
551 struct crypto_aead *aead;
552 u32 align;
553 int err;
554
555 esp = kzalloc(sizeof(*esp), GFP_KERNEL);
556 if (esp == NULL)
557 return -ENOMEM;
558
559 x->data = esp;
560
561 if (x->aead)
562 err = esp_init_aead(x);
563 else
564 err = esp_init_authenc(x);
565
566 if (err)
401 goto error; 567 goto error;
402 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; 568
569 aead = esp->aead;
570
571 esp->padlen = 0;
572
573 x->props.header_len = sizeof(struct ip_esp_hdr) +
574 crypto_aead_ivsize(aead);
403 if (x->props.mode == XFRM_MODE_TUNNEL) 575 if (x->props.mode == XFRM_MODE_TUNNEL)
404 x->props.header_len += sizeof(struct iphdr); 576 x->props.header_len += sizeof(struct iphdr);
405 else if (x->props.mode == XFRM_MODE_BEET) 577 else if (x->props.mode == XFRM_MODE_BEET)
@@ -418,21 +590,17 @@ static int esp_init_state(struct xfrm_state *x)
418 break; 590 break;
419 } 591 }
420 } 592 }
421 x->data = esp; 593
422 align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 594 align = ALIGN(crypto_aead_blocksize(aead), 4);
423 if (esp->conf.padlen) 595 if (esp->padlen)
424 align = max_t(u32, align, esp->conf.padlen); 596 align = max_t(u32, align, esp->padlen);
425 x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len; 597 x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
426 return 0;
427 598
428error: 599error:
429 x->data = esp; 600 return err;
430 esp_destroy(x);
431 x->data = NULL;
432 return -EINVAL;
433} 601}
434 602
435static struct xfrm_type esp_type = 603static const struct xfrm_type esp_type =
436{ 604{
437 .description = "ESP4", 605 .description = "ESP4",
438 .owner = THIS_MODULE, 606 .owner = THIS_MODULE,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 60123905dbbf..86ff2711fc95 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -47,52 +47,65 @@
47#include <net/ip_fib.h> 47#include <net/ip_fib.h>
48#include <net/rtnetlink.h> 48#include <net/rtnetlink.h>
49 49
50#define FFprint(a...) printk(KERN_DEBUG a) 50#ifndef CONFIG_IP_MULTIPLE_TABLES
51 51
52static struct sock *fibnl; 52static int __net_init fib4_rules_init(struct net *net)
53{
54 struct fib_table *local_table, *main_table;
53 55
54#ifndef CONFIG_IP_MULTIPLE_TABLES 56 local_table = fib_hash_table(RT_TABLE_LOCAL);
57 if (local_table == NULL)
58 return -ENOMEM;
55 59
56struct fib_table *ip_fib_local_table; 60 main_table = fib_hash_table(RT_TABLE_MAIN);
57struct fib_table *ip_fib_main_table; 61 if (main_table == NULL)
62 goto fail;
58 63
59#define FIB_TABLE_HASHSZ 1 64 hlist_add_head_rcu(&local_table->tb_hlist,
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; 65 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66 hlist_add_head_rcu(&main_table->tb_hlist,
67 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68 return 0;
61 69
70fail:
71 kfree(local_table);
72 return -ENOMEM;
73}
62#else 74#else
63 75
64#define FIB_TABLE_HASHSZ 256 76struct fib_table *fib_new_table(struct net *net, u32 id)
65static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
66
67struct fib_table *fib_new_table(u32 id)
68{ 77{
69 struct fib_table *tb; 78 struct fib_table *tb;
70 unsigned int h; 79 unsigned int h;
71 80
72 if (id == 0) 81 if (id == 0)
73 id = RT_TABLE_MAIN; 82 id = RT_TABLE_MAIN;
74 tb = fib_get_table(id); 83 tb = fib_get_table(net, id);
75 if (tb) 84 if (tb)
76 return tb; 85 return tb;
77 tb = fib_hash_init(id); 86
87 tb = fib_hash_table(id);
78 if (!tb) 88 if (!tb)
79 return NULL; 89 return NULL;
80 h = id & (FIB_TABLE_HASHSZ - 1); 90 h = id & (FIB_TABLE_HASHSZ - 1);
81 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); 91 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
82 return tb; 92 return tb;
83} 93}
84 94
85struct fib_table *fib_get_table(u32 id) 95struct fib_table *fib_get_table(struct net *net, u32 id)
86{ 96{
87 struct fib_table *tb; 97 struct fib_table *tb;
88 struct hlist_node *node; 98 struct hlist_node *node;
99 struct hlist_head *head;
89 unsigned int h; 100 unsigned int h;
90 101
91 if (id == 0) 102 if (id == 0)
92 id = RT_TABLE_MAIN; 103 id = RT_TABLE_MAIN;
93 h = id & (FIB_TABLE_HASHSZ - 1); 104 h = id & (FIB_TABLE_HASHSZ - 1);
105
94 rcu_read_lock(); 106 rcu_read_lock();
95 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { 107 head = &net->ipv4.fib_table_hash[h];
108 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
96 if (tb->tb_id == id) { 109 if (tb->tb_id == id) {
97 rcu_read_unlock(); 110 rcu_read_unlock();
98 return tb; 111 return tb;
@@ -103,15 +116,32 @@ struct fib_table *fib_get_table(u32 id)
103} 116}
104#endif /* CONFIG_IP_MULTIPLE_TABLES */ 117#endif /* CONFIG_IP_MULTIPLE_TABLES */
105 118
106static void fib_flush(void) 119void fib_select_default(struct net *net,
120 const struct flowi *flp, struct fib_result *res)
121{
122 struct fib_table *tb;
123 int table = RT_TABLE_MAIN;
124#ifdef CONFIG_IP_MULTIPLE_TABLES
125 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
126 return;
127 table = res->r->table;
128#endif
129 tb = fib_get_table(net, table);
130 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
131 tb->tb_select_default(tb, flp, res);
132}
133
134static void fib_flush(struct net *net)
107{ 135{
108 int flushed = 0; 136 int flushed = 0;
109 struct fib_table *tb; 137 struct fib_table *tb;
110 struct hlist_node *node; 138 struct hlist_node *node;
139 struct hlist_head *head;
111 unsigned int h; 140 unsigned int h;
112 141
113 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 142 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
114 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) 143 head = &net->ipv4.fib_table_hash[h];
144 hlist_for_each_entry(tb, node, head, tb_hlist)
115 flushed += tb->tb_flush(tb); 145 flushed += tb->tb_flush(tb);
116 } 146 }
117 147
@@ -123,7 +153,7 @@ static void fib_flush(void)
123 * Find the first device with a given source address. 153 * Find the first device with a given source address.
124 */ 154 */
125 155
126struct net_device * ip_dev_find(__be32 addr) 156struct net_device * ip_dev_find(struct net *net, __be32 addr)
127{ 157{
128 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 158 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
129 struct fib_result res; 159 struct fib_result res;
@@ -134,7 +164,7 @@ struct net_device * ip_dev_find(__be32 addr)
134 res.r = NULL; 164 res.r = NULL;
135#endif 165#endif
136 166
137 local_table = fib_get_table(RT_TABLE_LOCAL); 167 local_table = fib_get_table(net, RT_TABLE_LOCAL);
138 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 168 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
139 return NULL; 169 return NULL;
140 if (res.type != RTN_LOCAL) 170 if (res.type != RTN_LOCAL)
@@ -148,33 +178,51 @@ out:
148 return dev; 178 return dev;
149} 179}
150 180
151unsigned inet_addr_type(__be32 addr) 181/*
182 * Find address type as if only "dev" was present in the system. If
183 * on_dev is NULL then all interfaces are taken into consideration.
184 */
185static inline unsigned __inet_dev_addr_type(struct net *net,
186 const struct net_device *dev,
187 __be32 addr)
152{ 188{
153 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 189 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
154 struct fib_result res; 190 struct fib_result res;
155 unsigned ret = RTN_BROADCAST; 191 unsigned ret = RTN_BROADCAST;
156 struct fib_table *local_table; 192 struct fib_table *local_table;
157 193
158 if (ZERONET(addr) || BADCLASS(addr)) 194 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
159 return RTN_BROADCAST; 195 return RTN_BROADCAST;
160 if (MULTICAST(addr)) 196 if (ipv4_is_multicast(addr))
161 return RTN_MULTICAST; 197 return RTN_MULTICAST;
162 198
163#ifdef CONFIG_IP_MULTIPLE_TABLES 199#ifdef CONFIG_IP_MULTIPLE_TABLES
164 res.r = NULL; 200 res.r = NULL;
165#endif 201#endif
166 202
167 local_table = fib_get_table(RT_TABLE_LOCAL); 203 local_table = fib_get_table(net, RT_TABLE_LOCAL);
168 if (local_table) { 204 if (local_table) {
169 ret = RTN_UNICAST; 205 ret = RTN_UNICAST;
170 if (!local_table->tb_lookup(local_table, &fl, &res)) { 206 if (!local_table->tb_lookup(local_table, &fl, &res)) {
171 ret = res.type; 207 if (!dev || dev == res.fi->fib_dev)
208 ret = res.type;
172 fib_res_put(&res); 209 fib_res_put(&res);
173 } 210 }
174 } 211 }
175 return ret; 212 return ret;
176} 213}
177 214
215unsigned int inet_addr_type(struct net *net, __be32 addr)
216{
217 return __inet_dev_addr_type(net, NULL, addr);
218}
219
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
222{
223 return __inet_dev_addr_type(net, dev, addr);
224}
225
178/* Given (packet source, input interface) and optional (dst, oif, tos): 226/* Given (packet source, input interface) and optional (dst, oif, tos):
179 - (main) check, that source is valid i.e. not broadcast or our local 227 - (main) check, that source is valid i.e. not broadcast or our local
180 address. 228 address.
@@ -195,6 +243,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
195 struct fib_result res; 243 struct fib_result res;
196 int no_addr, rpf; 244 int no_addr, rpf;
197 int ret; 245 int ret;
246 struct net *net;
198 247
199 no_addr = rpf = 0; 248 no_addr = rpf = 0;
200 rcu_read_lock(); 249 rcu_read_lock();
@@ -208,7 +257,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
208 if (in_dev == NULL) 257 if (in_dev == NULL)
209 goto e_inval; 258 goto e_inval;
210 259
211 if (fib_lookup(&fl, &res)) 260 net = dev->nd_net;
261 if (fib_lookup(net, &fl, &res))
212 goto last_resort; 262 goto last_resort;
213 if (res.type != RTN_UNICAST) 263 if (res.type != RTN_UNICAST)
214 goto e_inval_res; 264 goto e_inval_res;
@@ -232,7 +282,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232 fl.oif = dev->ifindex; 282 fl.oif = dev->ifindex;
233 283
234 ret = 0; 284 ret = 0;
235 if (fib_lookup(&fl, &res) == 0) { 285 if (fib_lookup(net, &fl, &res) == 0) {
236 if (res.type == RTN_UNICAST) { 286 if (res.type == RTN_UNICAST) {
237 *spec_dst = FIB_RES_PREFSRC(res); 287 *spec_dst = FIB_RES_PREFSRC(res);
238 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 288 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
@@ -271,13 +321,14 @@ static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
271 return len + nla_total_size(4); 321 return len + nla_total_size(4);
272} 322}
273 323
274static int rtentry_to_fib_config(int cmd, struct rtentry *rt, 324static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
275 struct fib_config *cfg) 325 struct fib_config *cfg)
276{ 326{
277 __be32 addr; 327 __be32 addr;
278 int plen; 328 int plen;
279 329
280 memset(cfg, 0, sizeof(*cfg)); 330 memset(cfg, 0, sizeof(*cfg));
331 cfg->fc_nlinfo.nl_net = net;
281 332
282 if (rt->rt_dst.sa_family != AF_INET) 333 if (rt->rt_dst.sa_family != AF_INET)
283 return -EAFNOSUPPORT; 334 return -EAFNOSUPPORT;
@@ -338,7 +389,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
338 colon = strchr(devname, ':'); 389 colon = strchr(devname, ':');
339 if (colon) 390 if (colon)
340 *colon = 0; 391 *colon = 0;
341 dev = __dev_get_by_name(&init_net, devname); 392 dev = __dev_get_by_name(net, devname);
342 if (!dev) 393 if (!dev)
343 return -ENODEV; 394 return -ENODEV;
344 cfg->fc_oif = dev->ifindex; 395 cfg->fc_oif = dev->ifindex;
@@ -361,7 +412,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
361 if (rt->rt_gateway.sa_family == AF_INET && addr) { 412 if (rt->rt_gateway.sa_family == AF_INET && addr) {
362 cfg->fc_gw = addr; 413 cfg->fc_gw = addr;
363 if (rt->rt_flags & RTF_GATEWAY && 414 if (rt->rt_flags & RTF_GATEWAY &&
364 inet_addr_type(addr) == RTN_UNICAST) 415 inet_addr_type(net, addr) == RTN_UNICAST)
365 cfg->fc_scope = RT_SCOPE_UNIVERSE; 416 cfg->fc_scope = RT_SCOPE_UNIVERSE;
366 } 417 }
367 418
@@ -402,7 +453,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
402 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 453 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
403 */ 454 */
404 455
405int ip_rt_ioctl(unsigned int cmd, void __user *arg) 456int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
406{ 457{
407 struct fib_config cfg; 458 struct fib_config cfg;
408 struct rtentry rt; 459 struct rtentry rt;
@@ -418,18 +469,18 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
418 return -EFAULT; 469 return -EFAULT;
419 470
420 rtnl_lock(); 471 rtnl_lock();
421 err = rtentry_to_fib_config(cmd, &rt, &cfg); 472 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
422 if (err == 0) { 473 if (err == 0) {
423 struct fib_table *tb; 474 struct fib_table *tb;
424 475
425 if (cmd == SIOCDELRT) { 476 if (cmd == SIOCDELRT) {
426 tb = fib_get_table(cfg.fc_table); 477 tb = fib_get_table(net, cfg.fc_table);
427 if (tb) 478 if (tb)
428 err = tb->tb_delete(tb, &cfg); 479 err = tb->tb_delete(tb, &cfg);
429 else 480 else
430 err = -ESRCH; 481 err = -ESRCH;
431 } else { 482 } else {
432 tb = fib_new_table(cfg.fc_table); 483 tb = fib_new_table(net, cfg.fc_table);
433 if (tb) 484 if (tb)
434 err = tb->tb_insert(tb, &cfg); 485 err = tb->tb_insert(tb, &cfg);
435 else 486 else
@@ -459,8 +510,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
459 [RTA_FLOW] = { .type = NLA_U32 }, 510 [RTA_FLOW] = { .type = NLA_U32 },
460}; 511};
461 512
462static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, 513static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
463 struct fib_config *cfg) 514 struct nlmsghdr *nlh, struct fib_config *cfg)
464{ 515{
465 struct nlattr *attr; 516 struct nlattr *attr;
466 int err, remaining; 517 int err, remaining;
@@ -484,6 +535,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
484 535
485 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 536 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
486 cfg->fc_nlinfo.nlh = nlh; 537 cfg->fc_nlinfo.nlh = nlh;
538 cfg->fc_nlinfo.nl_net = net;
487 539
488 if (cfg->fc_type > RTN_MAX) { 540 if (cfg->fc_type > RTN_MAX) {
489 err = -EINVAL; 541 err = -EINVAL;
@@ -531,15 +583,16 @@ errout:
531 583
532static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 584static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
533{ 585{
586 struct net *net = skb->sk->sk_net;
534 struct fib_config cfg; 587 struct fib_config cfg;
535 struct fib_table *tb; 588 struct fib_table *tb;
536 int err; 589 int err;
537 590
538 err = rtm_to_fib_config(skb, nlh, &cfg); 591 err = rtm_to_fib_config(net, skb, nlh, &cfg);
539 if (err < 0) 592 if (err < 0)
540 goto errout; 593 goto errout;
541 594
542 tb = fib_get_table(cfg.fc_table); 595 tb = fib_get_table(net, cfg.fc_table);
543 if (tb == NULL) { 596 if (tb == NULL) {
544 err = -ESRCH; 597 err = -ESRCH;
545 goto errout; 598 goto errout;
@@ -552,15 +605,16 @@ errout:
552 605
553static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 606static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
554{ 607{
608 struct net *net = skb->sk->sk_net;
555 struct fib_config cfg; 609 struct fib_config cfg;
556 struct fib_table *tb; 610 struct fib_table *tb;
557 int err; 611 int err;
558 612
559 err = rtm_to_fib_config(skb, nlh, &cfg); 613 err = rtm_to_fib_config(net, skb, nlh, &cfg);
560 if (err < 0) 614 if (err < 0)
561 goto errout; 615 goto errout;
562 616
563 tb = fib_new_table(cfg.fc_table); 617 tb = fib_new_table(net, cfg.fc_table);
564 if (tb == NULL) { 618 if (tb == NULL) {
565 err = -ENOBUFS; 619 err = -ENOBUFS;
566 goto errout; 620 goto errout;
@@ -573,10 +627,12 @@ errout:
573 627
574static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 628static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
575{ 629{
630 struct net *net = skb->sk->sk_net;
576 unsigned int h, s_h; 631 unsigned int h, s_h;
577 unsigned int e = 0, s_e; 632 unsigned int e = 0, s_e;
578 struct fib_table *tb; 633 struct fib_table *tb;
579 struct hlist_node *node; 634 struct hlist_node *node;
635 struct hlist_head *head;
580 int dumped = 0; 636 int dumped = 0;
581 637
582 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 638 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
@@ -588,7 +644,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
588 644
589 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 645 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
590 e = 0; 646 e = 0;
591 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { 647 head = &net->ipv4.fib_table_hash[h];
648 hlist_for_each_entry(tb, node, head, tb_hlist) {
592 if (e < s_e) 649 if (e < s_e)
593 goto next; 650 goto next;
594 if (dumped) 651 if (dumped)
@@ -617,6 +674,7 @@ out:
617 674
618static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 675static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
619{ 676{
677 struct net *net = ifa->ifa_dev->dev->nd_net;
620 struct fib_table *tb; 678 struct fib_table *tb;
621 struct fib_config cfg = { 679 struct fib_config cfg = {
622 .fc_protocol = RTPROT_KERNEL, 680 .fc_protocol = RTPROT_KERNEL,
@@ -626,12 +684,15 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
626 .fc_prefsrc = ifa->ifa_local, 684 .fc_prefsrc = ifa->ifa_local,
627 .fc_oif = ifa->ifa_dev->dev->ifindex, 685 .fc_oif = ifa->ifa_dev->dev->ifindex,
628 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 686 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
687 .fc_nlinfo = {
688 .nl_net = net,
689 },
629 }; 690 };
630 691
631 if (type == RTN_UNICAST) 692 if (type == RTN_UNICAST)
632 tb = fib_new_table(RT_TABLE_MAIN); 693 tb = fib_new_table(net, RT_TABLE_MAIN);
633 else 694 else
634 tb = fib_new_table(RT_TABLE_LOCAL); 695 tb = fib_new_table(net, RT_TABLE_LOCAL);
635 696
636 if (tb == NULL) 697 if (tb == NULL)
637 return; 698 return;
@@ -661,7 +722,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
661 if (ifa->ifa_flags&IFA_F_SECONDARY) { 722 if (ifa->ifa_flags&IFA_F_SECONDARY) {
662 prim = inet_ifa_byprefix(in_dev, prefix, mask); 723 prim = inet_ifa_byprefix(in_dev, prefix, mask);
663 if (prim == NULL) { 724 if (prim == NULL) {
664 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n"); 725 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
665 return; 726 return;
666 } 727 }
667 } 728 }
@@ -675,7 +736,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
675 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 736 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
676 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 737 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
677 738
678 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 739 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679 (prefix != addr || ifa->ifa_prefixlen < 32)) { 740 (prefix != addr || ifa->ifa_prefixlen < 32)) {
680 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 741 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 742 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
@@ -708,7 +769,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
708 else { 769 else {
709 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 770 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
710 if (prim == NULL) { 771 if (prim == NULL) {
711 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n"); 772 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
712 return; 773 return;
713 } 774 }
714 } 775 }
@@ -740,15 +801,15 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
740 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 801 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
741 802
742 /* Check, that this local address finally disappeared. */ 803 /* Check, that this local address finally disappeared. */
743 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { 804 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
744 /* And the last, but not the least thing. 805 /* And the last, but not the least thing.
745 We must flush stray FIB entries. 806 We must flush stray FIB entries.
746 807
747 First of all, we scan fib_info list searching 808 First of all, we scan fib_info list searching
748 for stray nexthop entries, then ignite fib_flush. 809 for stray nexthop entries, then ignite fib_flush.
749 */ 810 */
750 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 811 if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local))
751 fib_flush(); 812 fib_flush(dev->nd_net);
752 } 813 }
753 } 814 }
754#undef LOCAL_OK 815#undef LOCAL_OK
@@ -790,39 +851,55 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
790 851
791static void nl_fib_input(struct sk_buff *skb) 852static void nl_fib_input(struct sk_buff *skb)
792{ 853{
854 struct net *net;
793 struct fib_result_nl *frn; 855 struct fib_result_nl *frn;
794 struct nlmsghdr *nlh; 856 struct nlmsghdr *nlh;
795 struct fib_table *tb; 857 struct fib_table *tb;
796 u32 pid; 858 u32 pid;
797 859
860 net = skb->sk->sk_net;
798 nlh = nlmsg_hdr(skb); 861 nlh = nlmsg_hdr(skb);
799 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 862 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
800 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) { 863 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
801 kfree_skb(skb);
802 return; 864 return;
803 } 865
866 skb = skb_clone(skb, GFP_KERNEL);
867 if (skb == NULL)
868 return;
869 nlh = nlmsg_hdr(skb);
804 870
805 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 871 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
806 tb = fib_get_table(frn->tb_id_in); 872 tb = fib_get_table(net, frn->tb_id_in);
807 873
808 nl_fib_lookup(frn, tb); 874 nl_fib_lookup(frn, tb);
809 875
810 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 876 pid = NETLINK_CB(skb).pid; /* pid of sending process */
811 NETLINK_CB(skb).pid = 0; /* from kernel */ 877 NETLINK_CB(skb).pid = 0; /* from kernel */
812 NETLINK_CB(skb).dst_group = 0; /* unicast */ 878 NETLINK_CB(skb).dst_group = 0; /* unicast */
813 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); 879 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
880}
881
882static int nl_fib_lookup_init(struct net *net)
883{
884 struct sock *sk;
885 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
886 nl_fib_input, NULL, THIS_MODULE);
887 if (sk == NULL)
888 return -EAFNOSUPPORT;
889 net->ipv4.fibnl = sk;
890 return 0;
814} 891}
815 892
816static void nl_fib_lookup_init(void) 893static void nl_fib_lookup_exit(struct net *net)
817{ 894{
818 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, 895 netlink_kernel_release(net->ipv4.fibnl);
819 nl_fib_input, NULL, THIS_MODULE); 896 net->ipv4.fibnl = NULL;
820} 897}
821 898
822static void fib_disable_ip(struct net_device *dev, int force) 899static void fib_disable_ip(struct net_device *dev, int force)
823{ 900{
824 if (fib_sync_down(0, dev, force)) 901 if (fib_sync_down_dev(dev, force))
825 fib_flush(); 902 fib_flush(dev->nd_net);
826 rt_cache_flush(0); 903 rt_cache_flush(0);
827 arp_ifdown(dev); 904 arp_ifdown(dev);
828} 905}
@@ -859,9 +936,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
859 struct net_device *dev = ptr; 936 struct net_device *dev = ptr;
860 struct in_device *in_dev = __in_dev_get_rtnl(dev); 937 struct in_device *in_dev = __in_dev_get_rtnl(dev);
861 938
862 if (dev->nd_net != &init_net)
863 return NOTIFY_DONE;
864
865 if (event == NETDEV_UNREGISTER) { 939 if (event == NETDEV_UNREGISTER) {
866 fib_disable_ip(dev, 2); 940 fib_disable_ip(dev, 2);
867 return NOTIFY_DONE; 941 return NOTIFY_DONE;
@@ -899,29 +973,100 @@ static struct notifier_block fib_netdev_notifier = {
899 .notifier_call =fib_netdev_event, 973 .notifier_call =fib_netdev_event,
900}; 974};
901 975
902void __init ip_fib_init(void) 976static int __net_init ip_fib_net_init(struct net *net)
903{ 977{
978 int err;
904 unsigned int i; 979 unsigned int i;
905 980
981 net->ipv4.fib_table_hash = kzalloc(
982 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
983 if (net->ipv4.fib_table_hash == NULL)
984 return -ENOMEM;
985
906 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 986 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
907 INIT_HLIST_HEAD(&fib_table_hash[i]); 987 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
908#ifndef CONFIG_IP_MULTIPLE_TABLES 988
909 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); 989 err = fib4_rules_init(net);
910 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); 990 if (err < 0)
911 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); 991 goto fail;
912 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); 992 return 0;
913#else 993
914 fib4_rules_init(); 994fail:
995 kfree(net->ipv4.fib_table_hash);
996 return err;
997}
998
999static void __net_exit ip_fib_net_exit(struct net *net)
1000{
1001 unsigned int i;
1002
1003#ifdef CONFIG_IP_MULTIPLE_TABLES
1004 fib4_rules_exit(net);
915#endif 1005#endif
916 1006
917 register_netdevice_notifier(&fib_netdev_notifier); 1007 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
918 register_inetaddr_notifier(&fib_inetaddr_notifier); 1008 struct fib_table *tb;
919 nl_fib_lookup_init(); 1009 struct hlist_head *head;
1010 struct hlist_node *node, *tmp;
1011
1012 head = &net->ipv4.fib_table_hash[i];
1013 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1014 hlist_del(node);
1015 tb->tb_flush(tb);
1016 kfree(tb);
1017 }
1018 }
1019 kfree(net->ipv4.fib_table_hash);
1020}
920 1021
1022static int __net_init fib_net_init(struct net *net)
1023{
1024 int error;
1025
1026 error = ip_fib_net_init(net);
1027 if (error < 0)
1028 goto out;
1029 error = nl_fib_lookup_init(net);
1030 if (error < 0)
1031 goto out_nlfl;
1032 error = fib_proc_init(net);
1033 if (error < 0)
1034 goto out_proc;
1035out:
1036 return error;
1037
1038out_proc:
1039 nl_fib_lookup_exit(net);
1040out_nlfl:
1041 ip_fib_net_exit(net);
1042 goto out;
1043}
1044
1045static void __net_exit fib_net_exit(struct net *net)
1046{
1047 fib_proc_exit(net);
1048 nl_fib_lookup_exit(net);
1049 ip_fib_net_exit(net);
1050}
1051
1052static struct pernet_operations fib_net_ops = {
1053 .init = fib_net_init,
1054 .exit = fib_net_exit,
1055};
1056
1057void __init ip_fib_init(void)
1058{
921 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1059 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
922 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1060 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
923 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1061 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1062
1063 register_pernet_subsys(&fib_net_ops);
1064 register_netdevice_notifier(&fib_netdev_notifier);
1065 register_inetaddr_notifier(&fib_inetaddr_notifier);
1066
1067 fib_hash_init();
924} 1068}
925 1069
926EXPORT_SYMBOL(inet_addr_type); 1070EXPORT_SYMBOL(inet_addr_type);
1071EXPORT_SYMBOL(inet_dev_addr_type);
927EXPORT_SYMBOL(ip_dev_find); 1072EXPORT_SYMBOL(ip_dev_find);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 527a6e0af5b6..76b9c684cccd 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -52,6 +52,7 @@ struct fib_node {
52 struct hlist_node fn_hash; 52 struct hlist_node fn_hash;
53 struct list_head fn_alias; 53 struct list_head fn_alias;
54 __be32 fn_key; 54 __be32 fn_key;
55 struct fib_alias fn_embedded_alias;
55}; 56};
56 57
57struct fn_zone { 58struct fn_zone {
@@ -102,10 +103,10 @@ static struct hlist_head *fz_hash_alloc(int divisor)
102 unsigned long size = divisor * sizeof(struct hlist_head); 103 unsigned long size = divisor * sizeof(struct hlist_head);
103 104
104 if (size <= PAGE_SIZE) { 105 if (size <= PAGE_SIZE) {
105 return kmalloc(size, GFP_KERNEL); 106 return kzalloc(size, GFP_KERNEL);
106 } else { 107 } else {
107 return (struct hlist_head *) 108 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL, get_order(size)); 109 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
109 } 110 }
110} 111}
111 112
@@ -168,14 +169,13 @@ static void fn_rehash_zone(struct fn_zone *fz)
168 new_hashmask = (new_divisor - 1); 169 new_hashmask = (new_divisor - 1);
169 170
170#if RT_CACHE_DEBUG >= 2 171#if RT_CACHE_DEBUG >= 2
171 printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); 172 printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
173 fz->fz_order, old_divisor);
172#endif 174#endif
173 175
174 ht = fz_hash_alloc(new_divisor); 176 ht = fz_hash_alloc(new_divisor);
175 177
176 if (ht) { 178 if (ht) {
177 memset(ht, 0, new_divisor * sizeof(struct hlist_head));
178
179 write_lock_bh(&fib_hash_lock); 179 write_lock_bh(&fib_hash_lock);
180 old_ht = fz->fz_hash; 180 old_ht = fz->fz_hash;
181 fz->fz_hash = ht; 181 fz->fz_hash = ht;
@@ -194,10 +194,13 @@ static inline void fn_free_node(struct fib_node * f)
194 kmem_cache_free(fn_hash_kmem, f); 194 kmem_cache_free(fn_hash_kmem, f);
195} 195}
196 196
197static inline void fn_free_alias(struct fib_alias *fa) 197static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
198{ 198{
199 fib_release_info(fa->fa_info); 199 fib_release_info(fa->fa_info);
200 kmem_cache_free(fn_alias_kmem, fa); 200 if (fa == &f->fn_embedded_alias)
201 fa->fa_info = NULL;
202 else
203 kmem_cache_free(fn_alias_kmem, fa);
201} 204}
202 205
203static struct fn_zone * 206static struct fn_zone *
@@ -219,7 +222,6 @@ fn_new_zone(struct fn_hash *table, int z)
219 kfree(fz); 222 kfree(fz);
220 return NULL; 223 return NULL;
221 } 224 }
222 memset(fz->fz_hash, 0, fz->fz_divisor * sizeof(struct hlist_head *));
223 fz->fz_order = z; 225 fz->fz_order = z;
224 fz->fz_mask = inet_make_mask(z); 226 fz->fz_mask = inet_make_mask(z);
225 227
@@ -275,8 +277,6 @@ out:
275 return err; 277 return err;
276} 278}
277 279
278static int fn_hash_last_dflt=-1;
279
280static void 280static void
281fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 281fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
282{ 282{
@@ -317,12 +317,9 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
317 if (next_fi != res->fi) 317 if (next_fi != res->fi)
318 break; 318 break;
319 } else if (!fib_detect_death(fi, order, &last_resort, 319 } else if (!fib_detect_death(fi, order, &last_resort,
320 &last_idx, &fn_hash_last_dflt)) { 320 &last_idx, tb->tb_default)) {
321 if (res->fi) 321 fib_result_assign(res, fi);
322 fib_info_put(res->fi); 322 tb->tb_default = order;
323 res->fi = fi;
324 atomic_inc(&fi->fib_clntref);
325 fn_hash_last_dflt = order;
326 goto out; 323 goto out;
327 } 324 }
328 fi = next_fi; 325 fi = next_fi;
@@ -331,27 +328,20 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
331 } 328 }
332 329
333 if (order <= 0 || fi == NULL) { 330 if (order <= 0 || fi == NULL) {
334 fn_hash_last_dflt = -1; 331 tb->tb_default = -1;
335 goto out; 332 goto out;
336 } 333 }
337 334
338 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) { 335 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
339 if (res->fi) 336 tb->tb_default)) {
340 fib_info_put(res->fi); 337 fib_result_assign(res, fi);
341 res->fi = fi; 338 tb->tb_default = order;
342 atomic_inc(&fi->fib_clntref);
343 fn_hash_last_dflt = order;
344 goto out; 339 goto out;
345 } 340 }
346 341
347 if (last_idx >= 0) { 342 if (last_idx >= 0)
348 if (res->fi) 343 fib_result_assign(res, last_resort);
349 fib_info_put(res->fi); 344 tb->tb_default = last_idx;
350 res->fi = last_resort;
351 if (last_resort)
352 atomic_inc(&last_resort->fib_clntref);
353 }
354 fn_hash_last_dflt = last_idx;
355out: 345out:
356 read_unlock(&fib_hash_lock); 346 read_unlock(&fib_hash_lock);
357} 347}
@@ -434,16 +424,43 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
434 424
435 if (fa && fa->fa_tos == tos && 425 if (fa && fa->fa_tos == tos &&
436 fa->fa_info->fib_priority == fi->fib_priority) { 426 fa->fa_info->fib_priority == fi->fib_priority) {
437 struct fib_alias *fa_orig; 427 struct fib_alias *fa_first, *fa_match;
438 428
439 err = -EEXIST; 429 err = -EEXIST;
440 if (cfg->fc_nlflags & NLM_F_EXCL) 430 if (cfg->fc_nlflags & NLM_F_EXCL)
441 goto out; 431 goto out;
442 432
433 /* We have 2 goals:
434 * 1. Find exact match for type, scope, fib_info to avoid
435 * duplicate routes
436 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
437 */
438 fa_match = NULL;
439 fa_first = fa;
440 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
441 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
442 if (fa->fa_tos != tos)
443 break;
444 if (fa->fa_info->fib_priority != fi->fib_priority)
445 break;
446 if (fa->fa_type == cfg->fc_type &&
447 fa->fa_scope == cfg->fc_scope &&
448 fa->fa_info == fi) {
449 fa_match = fa;
450 break;
451 }
452 }
453
443 if (cfg->fc_nlflags & NLM_F_REPLACE) { 454 if (cfg->fc_nlflags & NLM_F_REPLACE) {
444 struct fib_info *fi_drop; 455 struct fib_info *fi_drop;
445 u8 state; 456 u8 state;
446 457
458 fa = fa_first;
459 if (fa_match) {
460 if (fa == fa_match)
461 err = 0;
462 goto out;
463 }
447 write_lock_bh(&fib_hash_lock); 464 write_lock_bh(&fib_hash_lock);
448 fi_drop = fa->fa_info; 465 fi_drop = fa->fa_info;
449 fa->fa_info = fi; 466 fa->fa_info = fi;
@@ -466,20 +483,11 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
466 * uses the same scope, type, and nexthop 483 * uses the same scope, type, and nexthop
467 * information. 484 * information.
468 */ 485 */
469 fa_orig = fa; 486 if (fa_match)
470 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 487 goto out;
471 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { 488
472 if (fa->fa_tos != tos)
473 break;
474 if (fa->fa_info->fib_priority != fi->fib_priority)
475 break;
476 if (fa->fa_type == cfg->fc_type &&
477 fa->fa_scope == cfg->fc_scope &&
478 fa->fa_info == fi)
479 goto out;
480 }
481 if (!(cfg->fc_nlflags & NLM_F_APPEND)) 489 if (!(cfg->fc_nlflags & NLM_F_APPEND))
482 fa = fa_orig; 490 fa = fa_first;
483 } 491 }
484 492
485 err = -ENOENT; 493 err = -ENOENT;
@@ -487,15 +495,12 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
487 goto out; 495 goto out;
488 496
489 err = -ENOBUFS; 497 err = -ENOBUFS;
490 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
491 if (new_fa == NULL)
492 goto out;
493 498
494 new_f = NULL; 499 new_f = NULL;
495 if (!f) { 500 if (!f) {
496 new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL); 501 new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
497 if (new_f == NULL) 502 if (new_f == NULL)
498 goto out_free_new_fa; 503 goto out;
499 504
500 INIT_HLIST_NODE(&new_f->fn_hash); 505 INIT_HLIST_NODE(&new_f->fn_hash);
501 INIT_LIST_HEAD(&new_f->fn_alias); 506 INIT_LIST_HEAD(&new_f->fn_alias);
@@ -503,6 +508,12 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
503 f = new_f; 508 f = new_f;
504 } 509 }
505 510
511 new_fa = &f->fn_embedded_alias;
512 if (new_fa->fa_info != NULL) {
513 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
514 if (new_fa == NULL)
515 goto out_free_new_f;
516 }
506 new_fa->fa_info = fi; 517 new_fa->fa_info = fi;
507 new_fa->fa_tos = tos; 518 new_fa->fa_tos = tos;
508 new_fa->fa_type = cfg->fc_type; 519 new_fa->fa_type = cfg->fc_type;
@@ -529,8 +540,8 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
529 &cfg->fc_nlinfo, 0); 540 &cfg->fc_nlinfo, 0);
530 return 0; 541 return 0;
531 542
532out_free_new_fa: 543out_free_new_f:
533 kmem_cache_free(fn_alias_kmem, new_fa); 544 kmem_cache_free(fn_hash_kmem, new_f);
534out: 545out:
535 fib_release_info(fi); 546 fib_release_info(fi);
536 return err; 547 return err;
@@ -606,7 +617,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
606 617
607 if (fa->fa_state & FA_S_ACCESSED) 618 if (fa->fa_state & FA_S_ACCESSED)
608 rt_cache_flush(-1); 619 rt_cache_flush(-1);
609 fn_free_alias(fa); 620 fn_free_alias(fa, f);
610 if (kill_fn) { 621 if (kill_fn) {
611 fn_free_node(f); 622 fn_free_node(f);
612 fz->fz_nent--; 623 fz->fz_nent--;
@@ -642,7 +653,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
642 fib_hash_genid++; 653 fib_hash_genid++;
643 write_unlock_bh(&fib_hash_lock); 654 write_unlock_bh(&fib_hash_lock);
644 655
645 fn_free_alias(fa); 656 fn_free_alias(fa, f);
646 found++; 657 found++;
647 } 658 }
648 } 659 }
@@ -718,19 +729,18 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
718{ 729{
719 int h, s_h; 730 int h, s_h;
720 731
732 if (fz->fz_hash == NULL)
733 return skb->len;
721 s_h = cb->args[3]; 734 s_h = cb->args[3];
722 for (h=0; h < fz->fz_divisor; h++) { 735 for (h = s_h; h < fz->fz_divisor; h++) {
723 if (h < s_h) continue; 736 if (hlist_empty(&fz->fz_hash[h]))
724 if (h > s_h)
725 memset(&cb->args[4], 0,
726 sizeof(cb->args) - 4*sizeof(cb->args[0]));
727 if (fz->fz_hash == NULL ||
728 hlist_empty(&fz->fz_hash[h]))
729 continue; 737 continue;
730 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { 738 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) {
731 cb->args[3] = h; 739 cb->args[3] = h;
732 return -1; 740 return -1;
733 } 741 }
742 memset(&cb->args[4], 0,
743 sizeof(cb->args) - 4*sizeof(cb->args[0]));
734 } 744 }
735 cb->args[3] = h; 745 cb->args[3] = h;
736 return skb->len; 746 return skb->len;
@@ -746,39 +756,32 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
746 read_lock(&fib_hash_lock); 756 read_lock(&fib_hash_lock);
747 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { 757 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
748 if (m < s_m) continue; 758 if (m < s_m) continue;
749 if (m > s_m)
750 memset(&cb->args[3], 0,
751 sizeof(cb->args) - 3*sizeof(cb->args[0]));
752 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { 759 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
753 cb->args[2] = m; 760 cb->args[2] = m;
754 read_unlock(&fib_hash_lock); 761 read_unlock(&fib_hash_lock);
755 return -1; 762 return -1;
756 } 763 }
764 memset(&cb->args[3], 0,
765 sizeof(cb->args) - 3*sizeof(cb->args[0]));
757 } 766 }
758 read_unlock(&fib_hash_lock); 767 read_unlock(&fib_hash_lock);
759 cb->args[2] = m; 768 cb->args[2] = m;
760 return skb->len; 769 return skb->len;
761} 770}
762 771
763#ifdef CONFIG_IP_MULTIPLE_TABLES 772void __init fib_hash_init(void)
764struct fib_table * fib_hash_init(u32 id)
765#else
766struct fib_table * __init fib_hash_init(u32 id)
767#endif
768{ 773{
769 struct fib_table *tb; 774 fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
775 0, SLAB_PANIC, NULL);
776
777 fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
778 0, SLAB_PANIC, NULL);
770 779
771 if (fn_hash_kmem == NULL) 780}
772 fn_hash_kmem = kmem_cache_create("ip_fib_hash",
773 sizeof(struct fib_node),
774 0, SLAB_HWCACHE_ALIGN,
775 NULL);
776 781
777 if (fn_alias_kmem == NULL) 782struct fib_table *fib_hash_table(u32 id)
778 fn_alias_kmem = kmem_cache_create("ip_fib_alias", 783{
779 sizeof(struct fib_alias), 784 struct fib_table *tb;
780 0, SLAB_HWCACHE_ALIGN,
781 NULL);
782 785
783 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), 786 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
784 GFP_KERNEL); 787 GFP_KERNEL);
@@ -786,6 +789,7 @@ struct fib_table * __init fib_hash_init(u32 id)
786 return NULL; 789 return NULL;
787 790
788 tb->tb_id = id; 791 tb->tb_id = id;
792 tb->tb_default = -1;
789 tb->tb_lookup = fn_hash_lookup; 793 tb->tb_lookup = fn_hash_lookup;
790 tb->tb_insert = fn_hash_insert; 794 tb->tb_insert = fn_hash_insert;
791 tb->tb_delete = fn_hash_delete; 795 tb->tb_delete = fn_hash_delete;
@@ -800,6 +804,7 @@ struct fib_table * __init fib_hash_init(u32 id)
800#ifdef CONFIG_PROC_FS 804#ifdef CONFIG_PROC_FS
801 805
802struct fib_iter_state { 806struct fib_iter_state {
807 struct seq_net_private p;
803 struct fn_zone *zone; 808 struct fn_zone *zone;
804 int bucket; 809 int bucket;
805 struct hlist_head *hash_head; 810 struct hlist_head *hash_head;
@@ -813,7 +818,11 @@ struct fib_iter_state {
813static struct fib_alias *fib_get_first(struct seq_file *seq) 818static struct fib_alias *fib_get_first(struct seq_file *seq)
814{ 819{
815 struct fib_iter_state *iter = seq->private; 820 struct fib_iter_state *iter = seq->private;
816 struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data; 821 struct fib_table *main_table;
822 struct fn_hash *table;
823
824 main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN);
825 table = (struct fn_hash *)main_table->tb_data;
817 826
818 iter->bucket = 0; 827 iter->bucket = 0;
819 iter->hash_head = NULL; 828 iter->hash_head = NULL;
@@ -948,11 +957,13 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
948} 957}
949 958
950static void *fib_seq_start(struct seq_file *seq, loff_t *pos) 959static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
960 __acquires(fib_hash_lock)
951{ 961{
962 struct fib_iter_state *iter = seq->private;
952 void *v = NULL; 963 void *v = NULL;
953 964
954 read_lock(&fib_hash_lock); 965 read_lock(&fib_hash_lock);
955 if (ip_fib_main_table) 966 if (fib_get_table(iter->p.net, RT_TABLE_MAIN))
956 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 967 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
957 return v; 968 return v;
958} 969}
@@ -964,6 +975,7 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
964} 975}
965 976
966static void fib_seq_stop(struct seq_file *seq, void *v) 977static void fib_seq_stop(struct seq_file *seq, void *v)
978 __releases(fib_hash_lock)
967{ 979{
968 read_unlock(&fib_hash_lock); 980 read_unlock(&fib_hash_lock);
969} 981}
@@ -1039,8 +1051,8 @@ static const struct seq_operations fib_seq_ops = {
1039 1051
1040static int fib_seq_open(struct inode *inode, struct file *file) 1052static int fib_seq_open(struct inode *inode, struct file *file)
1041{ 1053{
1042 return seq_open_private(file, &fib_seq_ops, 1054 return seq_open_net(inode, file, &fib_seq_ops,
1043 sizeof(struct fib_iter_state)); 1055 sizeof(struct fib_iter_state));
1044} 1056}
1045 1057
1046static const struct file_operations fib_seq_fops = { 1058static const struct file_operations fib_seq_fops = {
@@ -1048,18 +1060,18 @@ static const struct file_operations fib_seq_fops = {
1048 .open = fib_seq_open, 1060 .open = fib_seq_open,
1049 .read = seq_read, 1061 .read = seq_read,
1050 .llseek = seq_lseek, 1062 .llseek = seq_lseek,
1051 .release = seq_release_private, 1063 .release = seq_release_net,
1052}; 1064};
1053 1065
1054int __init fib_proc_init(void) 1066int __net_init fib_proc_init(struct net *net)
1055{ 1067{
1056 if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_seq_fops)) 1068 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
1057 return -ENOMEM; 1069 return -ENOMEM;
1058 return 0; 1070 return 0;
1059} 1071}
1060 1072
1061void __init fib_proc_exit(void) 1073void __net_exit fib_proc_exit(struct net *net)
1062{ 1074{
1063 proc_net_remove(&init_net, "route"); 1075 proc_net_remove(net, "route");
1064} 1076}
1065#endif /* CONFIG_PROC_FS */ 1077#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index eef9eec17e0c..2c1623d2768b 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -7,12 +7,14 @@
7 7
8struct fib_alias { 8struct fib_alias {
9 struct list_head fa_list; 9 struct list_head fa_list;
10 struct rcu_head rcu;
11 struct fib_info *fa_info; 10 struct fib_info *fa_info;
12 u8 fa_tos; 11 u8 fa_tos;
13 u8 fa_type; 12 u8 fa_type;
14 u8 fa_scope; 13 u8 fa_scope;
15 u8 fa_state; 14 u8 fa_state;
15#ifdef CONFIG_IP_FIB_TRIE
16 struct rcu_head rcu;
17#endif
16}; 18};
17 19
18#define FA_S_ACCESSED 0x01 20#define FA_S_ACCESSED 0x01
@@ -36,6 +38,16 @@ extern struct fib_alias *fib_find_alias(struct list_head *fah,
36 u8 tos, u32 prio); 38 u8 tos, u32 prio);
37extern int fib_detect_death(struct fib_info *fi, int order, 39extern int fib_detect_death(struct fib_info *fi, int order,
38 struct fib_info **last_resort, 40 struct fib_info **last_resort,
39 int *last_idx, int *dflt); 41 int *last_idx, int dflt);
42
43static inline void fib_result_assign(struct fib_result *res,
44 struct fib_info *fi)
45{
46 if (res->fi != NULL)
47 fib_info_put(res->fi);
48 res->fi = fi;
49 if (fi != NULL)
50 atomic_inc(&fi->fib_clntref);
51}
40 52
41#endif /* _FIB_LOOKUP_H */ 53#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f16839c6a721..19274d01afa4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -32,8 +32,6 @@
32#include <net/ip_fib.h> 32#include <net/ip_fib.h>
33#include <net/fib_rules.h> 33#include <net/fib_rules.h>
34 34
35static struct fib_rules_ops fib4_rules_ops;
36
37struct fib4_rule 35struct fib4_rule
38{ 36{
39 struct fib_rule common; 37 struct fib_rule common;
@@ -49,33 +47,6 @@ struct fib4_rule
49#endif 47#endif
50}; 48};
51 49
52static struct fib4_rule default_rule = {
53 .common = {
54 .refcnt = ATOMIC_INIT(2),
55 .pref = 0x7FFF,
56 .table = RT_TABLE_DEFAULT,
57 .action = FR_ACT_TO_TBL,
58 },
59};
60
61static struct fib4_rule main_rule = {
62 .common = {
63 .refcnt = ATOMIC_INIT(2),
64 .pref = 0x7FFE,
65 .table = RT_TABLE_MAIN,
66 .action = FR_ACT_TO_TBL,
67 },
68};
69
70static struct fib4_rule local_rule = {
71 .common = {
72 .refcnt = ATOMIC_INIT(2),
73 .table = RT_TABLE_LOCAL,
74 .action = FR_ACT_TO_TBL,
75 .flags = FIB_RULE_PERMANENT,
76 },
77};
78
79#ifdef CONFIG_NET_CLS_ROUTE 50#ifdef CONFIG_NET_CLS_ROUTE
80u32 fib_rules_tclass(struct fib_result *res) 51u32 fib_rules_tclass(struct fib_result *res)
81{ 52{
@@ -83,14 +54,14 @@ u32 fib_rules_tclass(struct fib_result *res)
83} 54}
84#endif 55#endif
85 56
86int fib_lookup(struct flowi *flp, struct fib_result *res) 57int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
87{ 58{
88 struct fib_lookup_arg arg = { 59 struct fib_lookup_arg arg = {
89 .result = res, 60 .result = res,
90 }; 61 };
91 int err; 62 int err;
92 63
93 err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); 64 err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg);
94 res->r = arg.rule; 65 res->r = arg.rule;
95 66
96 return err; 67 return err;
@@ -120,7 +91,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
120 goto errout; 91 goto errout;
121 } 92 }
122 93
123 if ((tbl = fib_get_table(rule->table)) == NULL) 94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL)
124 goto errout; 95 goto errout;
125 96
126 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); 97 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
@@ -131,16 +102,6 @@ errout:
131} 102}
132 103
133 104
134void fib_select_default(const struct flowi *flp, struct fib_result *res)
135{
136 if (res->r && res->r->action == FR_ACT_TO_TBL &&
137 FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
138 struct fib_table *tb;
139 if ((tb = fib_get_table(res->r->table)) != NULL)
140 tb->tb_select_default(tb, flp, res);
141 }
142}
143
144static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 105static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
145{ 106{
146 struct fib4_rule *r = (struct fib4_rule *) rule; 107 struct fib4_rule *r = (struct fib4_rule *) rule;
@@ -157,13 +118,13 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
157 return 1; 118 return 1;
158} 119}
159 120
160static struct fib_table *fib_empty_table(void) 121static struct fib_table *fib_empty_table(struct net *net)
161{ 122{
162 u32 id; 123 u32 id;
163 124
164 for (id = 1; id <= RT_TABLE_MAX; id++) 125 for (id = 1; id <= RT_TABLE_MAX; id++)
165 if (fib_get_table(id) == NULL) 126 if (fib_get_table(net, id) == NULL)
166 return fib_new_table(id); 127 return fib_new_table(net, id);
167 return NULL; 128 return NULL;
168} 129}
169 130
@@ -176,6 +137,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
176 struct nlmsghdr *nlh, struct fib_rule_hdr *frh, 137 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
177 struct nlattr **tb) 138 struct nlattr **tb)
178{ 139{
140 struct net *net = skb->sk->sk_net;
179 int err = -EINVAL; 141 int err = -EINVAL;
180 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 142 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
181 143
@@ -186,7 +148,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
186 if (rule->action == FR_ACT_TO_TBL) { 148 if (rule->action == FR_ACT_TO_TBL) {
187 struct fib_table *table; 149 struct fib_table *table;
188 150
189 table = fib_empty_table(); 151 table = fib_empty_table(net);
190 if (table == NULL) { 152 if (table == NULL) {
191 err = -ENOBUFS; 153 err = -ENOBUFS;
192 goto errout; 154 goto errout;
@@ -272,14 +234,14 @@ nla_put_failure:
272 return -ENOBUFS; 234 return -ENOBUFS;
273} 235}
274 236
275static u32 fib4_rule_default_pref(void) 237static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
276{ 238{
277 struct list_head *pos; 239 struct list_head *pos;
278 struct fib_rule *rule; 240 struct fib_rule *rule;
279 241
280 if (!list_empty(&fib4_rules_ops.rules_list)) { 242 if (!list_empty(&ops->rules_list)) {
281 pos = fib4_rules_ops.rules_list.next; 243 pos = ops->rules_list.next;
282 if (pos->next != &fib4_rules_ops.rules_list) { 244 if (pos->next != &ops->rules_list) {
283 rule = list_entry(pos->next, struct fib_rule, list); 245 rule = list_entry(pos->next, struct fib_rule, list);
284 if (rule->pref) 246 if (rule->pref)
285 return rule->pref - 1; 247 return rule->pref - 1;
@@ -301,7 +263,7 @@ static void fib4_rule_flush_cache(void)
301 rt_cache_flush(-1); 263 rt_cache_flush(-1);
302} 264}
303 265
304static struct fib_rules_ops fib4_rules_ops = { 266static struct fib_rules_ops fib4_rules_ops_template = {
305 .family = AF_INET, 267 .family = AF_INET,
306 .rule_size = sizeof(struct fib4_rule), 268 .rule_size = sizeof(struct fib4_rule),
307 .addr_size = sizeof(u32), 269 .addr_size = sizeof(u32),
@@ -315,15 +277,53 @@ static struct fib_rules_ops fib4_rules_ops = {
315 .flush_cache = fib4_rule_flush_cache, 277 .flush_cache = fib4_rule_flush_cache,
316 .nlgroup = RTNLGRP_IPV4_RULE, 278 .nlgroup = RTNLGRP_IPV4_RULE,
317 .policy = fib4_rule_policy, 279 .policy = fib4_rule_policy,
318 .rules_list = LIST_HEAD_INIT(fib4_rules_ops.rules_list),
319 .owner = THIS_MODULE, 280 .owner = THIS_MODULE,
320}; 281};
321 282
322void __init fib4_rules_init(void) 283static int fib_default_rules_init(struct fib_rules_ops *ops)
284{
285 int err;
286
287 err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, FIB_RULE_PERMANENT);
288 if (err < 0)
289 return err;
290 err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0);
291 if (err < 0)
292 return err;
293 err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0);
294 if (err < 0)
295 return err;
296 return 0;
297}
298
299int __net_init fib4_rules_init(struct net *net)
323{ 300{
324 list_add_tail(&local_rule.common.list, &fib4_rules_ops.rules_list); 301 int err;
325 list_add_tail(&main_rule.common.list, &fib4_rules_ops.rules_list); 302 struct fib_rules_ops *ops;
326 list_add_tail(&default_rule.common.list, &fib4_rules_ops.rules_list); 303
304 ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL);
305 if (ops == NULL)
306 return -ENOMEM;
307 INIT_LIST_HEAD(&ops->rules_list);
308 ops->fro_net = net;
309
310 fib_rules_register(ops);
327 311
328 fib_rules_register(&fib4_rules_ops); 312 err = fib_default_rules_init(ops);
313 if (err < 0)
314 goto fail;
315 net->ipv4.rules_ops = ops;
316 return 0;
317
318fail:
319 /* also cleans all rules already added */
320 fib_rules_unregister(ops);
321 kfree(ops);
322 return err;
323}
324
325void __net_exit fib4_rules_exit(struct net *net)
326{
327 fib_rules_unregister(net->ipv4.rules_ops);
328 kfree(net->ipv4.rules_ops);
329} 329}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1351a2617dce..a13c84763d4c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -47,8 +47,6 @@
47 47
48#include "fib_lookup.h" 48#include "fib_lookup.h"
49 49
50#define FSprintk(a...)
51
52static DEFINE_SPINLOCK(fib_info_lock); 50static DEFINE_SPINLOCK(fib_info_lock);
53static struct hlist_head *fib_info_hash; 51static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash; 52static struct hlist_head *fib_info_laddrhash;
@@ -145,7 +143,7 @@ static const struct
145void free_fib_info(struct fib_info *fi) 143void free_fib_info(struct fib_info *fi)
146{ 144{
147 if (fi->fib_dead == 0) { 145 if (fi->fib_dead == 0) {
148 printk("Freeing alive fib_info %p\n", fi); 146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
149 return; 147 return;
150 } 148 }
151 change_nexthops(fi) { 149 change_nexthops(fi) {
@@ -196,6 +194,15 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
196 return 0; 194 return 0;
197} 195}
198 196
197static inline unsigned int fib_devindex_hashfn(unsigned int val)
198{
199 unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201 return (val ^
202 (val >> DEVINDEX_HASHBITS) ^
203 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204}
205
199static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 206static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200{ 207{
201 unsigned int mask = (fib_hash_size - 1); 208 unsigned int mask = (fib_hash_size - 1);
@@ -204,6 +211,9 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
204 val ^= fi->fib_protocol; 211 val ^= fi->fib_protocol;
205 val ^= (__force u32)fi->fib_prefsrc; 212 val ^= (__force u32)fi->fib_prefsrc;
206 val ^= fi->fib_priority; 213 val ^= fi->fib_priority;
214 for_nexthops(fi) {
215 val ^= fib_devindex_hashfn(nh->nh_oif);
216 } endfor_nexthops(fi)
207 217
208 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 218 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209} 219}
@@ -219,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
219 head = &fib_info_hash[hash]; 229 head = &fib_info_hash[hash];
220 230
221 hlist_for_each_entry(fi, node, head, fib_hash) { 231 hlist_for_each_entry(fi, node, head, fib_hash) {
232 if (fi->fib_net != nfi->fib_net)
233 continue;
222 if (fi->fib_nhs != nfi->fib_nhs) 234 if (fi->fib_nhs != nfi->fib_nhs)
223 continue; 235 continue;
224 if (nfi->fib_protocol == fi->fib_protocol && 236 if (nfi->fib_protocol == fi->fib_protocol &&
@@ -234,15 +246,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
234 return NULL; 246 return NULL;
235} 247}
236 248
237static inline unsigned int fib_devindex_hashfn(unsigned int val)
238{
239 unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241 return (val ^
242 (val >> DEVINDEX_HASHBITS) ^
243 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244}
245
246/* Check, that the gateway is already configured. 249/* Check, that the gateway is already configured.
247 Used only by redirect accept routine. 250 Used only by redirect accept routine.
248 */ 251 */
@@ -320,11 +323,11 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
320 kfree_skb(skb); 323 kfree_skb(skb);
321 goto errout; 324 goto errout;
322 } 325 }
323 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, 326 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
324 info->nlh, GFP_KERNEL); 327 info->nlh, GFP_KERNEL);
325errout: 328errout:
326 if (err < 0) 329 if (err < 0)
327 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); 330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
328} 331}
329 332
330/* Return the first fib alias matching TOS with 333/* Return the first fib alias matching TOS with
@@ -346,7 +349,7 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
346} 349}
347 350
348int fib_detect_death(struct fib_info *fi, int order, 351int fib_detect_death(struct fib_info *fi, int order,
349 struct fib_info **last_resort, int *last_idx, int *dflt) 352 struct fib_info **last_resort, int *last_idx, int dflt)
350{ 353{
351 struct neighbour *n; 354 struct neighbour *n;
352 int state = NUD_NONE; 355 int state = NUD_NONE;
@@ -358,10 +361,10 @@ int fib_detect_death(struct fib_info *fi, int order,
358 } 361 }
359 if (state==NUD_REACHABLE) 362 if (state==NUD_REACHABLE)
360 return 0; 363 return 0;
361 if ((state&NUD_VALID) && order != *dflt) 364 if ((state&NUD_VALID) && order != dflt)
362 return 0; 365 return 0;
363 if ((state&NUD_VALID) || 366 if ((state&NUD_VALID) ||
364 (*last_idx<0 && order > *dflt)) { 367 (*last_idx<0 && order > dflt)) {
365 *last_resort = fi; 368 *last_resort = fi;
366 *last_idx = order; 369 *last_idx = order;
367 } 370 }
@@ -518,7 +521,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518 struct fib_nh *nh) 521 struct fib_nh *nh)
519{ 522{
520 int err; 523 int err;
524 struct net *net;
521 525
526 net = cfg->fc_nlinfo.nl_net;
522 if (nh->nh_gw) { 527 if (nh->nh_gw) {
523 struct fib_result res; 528 struct fib_result res;
524 529
@@ -531,9 +536,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
531 536
532 if (cfg->fc_scope >= RT_SCOPE_LINK) 537 if (cfg->fc_scope >= RT_SCOPE_LINK)
533 return -EINVAL; 538 return -EINVAL;
534 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 539 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
535 return -EINVAL; 540 return -EINVAL;
536 if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) 541 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
537 return -ENODEV; 542 return -ENODEV;
538 if (!(dev->flags&IFF_UP)) 543 if (!(dev->flags&IFF_UP))
539 return -ENETDOWN; 544 return -ENETDOWN;
@@ -556,7 +561,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
556 /* It is not necessary, but requires a bit of thinking */ 561 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK) 562 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK; 563 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0) 564 if ((err = fib_lookup(net, &fl, &res)) != 0)
560 return err; 565 return err;
561 } 566 }
562 err = -EINVAL; 567 err = -EINVAL;
@@ -580,7 +585,7 @@ out:
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 585 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581 return -EINVAL; 586 return -EINVAL;
582 587
583 in_dev = inetdev_by_index(nh->nh_oif); 588 in_dev = inetdev_by_index(net, nh->nh_oif);
584 if (in_dev == NULL) 589 if (in_dev == NULL)
585 return -ENODEV; 590 return -ENODEV;
586 if (!(in_dev->dev->flags&IFF_UP)) { 591 if (!(in_dev->dev->flags&IFF_UP)) {
@@ -605,10 +610,10 @@ static inline unsigned int fib_laddr_hashfn(__be32 val)
605static struct hlist_head *fib_hash_alloc(int bytes) 610static struct hlist_head *fib_hash_alloc(int bytes)
606{ 611{
607 if (bytes <= PAGE_SIZE) 612 if (bytes <= PAGE_SIZE)
608 return kmalloc(bytes, GFP_KERNEL); 613 return kzalloc(bytes, GFP_KERNEL);
609 else 614 else
610 return (struct hlist_head *) 615 return (struct hlist_head *)
611 __get_free_pages(GFP_KERNEL, get_order(bytes)); 616 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
612} 617}
613 618
614static void fib_hash_free(struct hlist_head *hash, int bytes) 619static void fib_hash_free(struct hlist_head *hash, int bytes)
@@ -684,6 +689,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
684 struct fib_info *fi = NULL; 689 struct fib_info *fi = NULL;
685 struct fib_info *ofi; 690 struct fib_info *ofi;
686 int nhs = 1; 691 int nhs = 1;
692 struct net *net = cfg->fc_nlinfo.nl_net;
687 693
688 /* Fast check to catch the most weird cases */ 694 /* Fast check to catch the most weird cases */
689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 695 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
@@ -712,12 +718,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
712 if (!new_info_hash || !new_laddrhash) { 718 if (!new_info_hash || !new_laddrhash) {
713 fib_hash_free(new_info_hash, bytes); 719 fib_hash_free(new_info_hash, bytes);
714 fib_hash_free(new_laddrhash, bytes); 720 fib_hash_free(new_laddrhash, bytes);
715 } else { 721 } else
716 memset(new_info_hash, 0, bytes);
717 memset(new_laddrhash, 0, bytes);
718
719 fib_hash_move(new_info_hash, new_laddrhash, new_size); 722 fib_hash_move(new_info_hash, new_laddrhash, new_size);
720 }
721 723
722 if (!fib_hash_size) 724 if (!fib_hash_size)
723 goto failure; 725 goto failure;
@@ -728,6 +730,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
728 goto failure; 730 goto failure;
729 fib_info_cnt++; 731 fib_info_cnt++;
730 732
733 fi->fib_net = net;
731 fi->fib_protocol = cfg->fc_protocol; 734 fi->fib_protocol = cfg->fc_protocol;
732 fi->fib_flags = cfg->fc_flags; 735 fi->fib_flags = cfg->fc_flags;
733 fi->fib_priority = cfg->fc_priority; 736 fi->fib_priority = cfg->fc_priority;
@@ -799,7 +802,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
799 if (nhs != 1 || nh->nh_gw) 802 if (nhs != 1 || nh->nh_gw)
800 goto err_inval; 803 goto err_inval;
801 nh->nh_scope = RT_SCOPE_NOWHERE; 804 nh->nh_scope = RT_SCOPE_NOWHERE;
802 nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); 805 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
803 err = -ENODEV; 806 err = -ENODEV;
804 if (nh->nh_dev == NULL) 807 if (nh->nh_dev == NULL)
805 goto failure; 808 goto failure;
@@ -813,7 +816,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
813 if (fi->fib_prefsrc) { 816 if (fi->fib_prefsrc) {
814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 817 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815 fi->fib_prefsrc != cfg->fc_dst) 818 fi->fib_prefsrc != cfg->fc_dst)
816 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 819 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
817 goto err_inval; 820 goto err_inval;
818 } 821 }
819 822
@@ -914,7 +917,8 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
914 continue; 917 continue;
915 918
916 default: 919 default:
917 printk(KERN_DEBUG "impossible 102\n"); 920 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
921 fa->fa_type);
918 return -EINVAL; 922 return -EINVAL;
919 } 923 }
920 } 924 }
@@ -1029,70 +1033,74 @@ nla_put_failure:
1029 referring to it. 1033 referring to it.
1030 - device went down -> we must shutdown all nexthops going via it. 1034 - device went down -> we must shutdown all nexthops going via it.
1031 */ 1035 */
1032 1036int fib_sync_down_addr(struct net *net, __be32 local)
1033int fib_sync_down(__be32 local, struct net_device *dev, int force)
1034{ 1037{
1035 int ret = 0; 1038 int ret = 0;
1036 int scope = RT_SCOPE_NOWHERE; 1039 unsigned int hash = fib_laddr_hashfn(local);
1037 1040 struct hlist_head *head = &fib_info_laddrhash[hash];
1038 if (force) 1041 struct hlist_node *node;
1039 scope = -1; 1042 struct fib_info *fi;
1040 1043
1041 if (local && fib_info_laddrhash) { 1044 if (fib_info_laddrhash == NULL || local == 0)
1042 unsigned int hash = fib_laddr_hashfn(local); 1045 return 0;
1043 struct hlist_head *head = &fib_info_laddrhash[hash];
1044 struct hlist_node *node;
1045 struct fib_info *fi;
1046 1046
1047 hlist_for_each_entry(fi, node, head, fib_lhash) { 1047 hlist_for_each_entry(fi, node, head, fib_lhash) {
1048 if (fi->fib_prefsrc == local) { 1048 if (fi->fib_net != net)
1049 fi->fib_flags |= RTNH_F_DEAD; 1049 continue;
1050 ret++; 1050 if (fi->fib_prefsrc == local) {
1051 } 1051 fi->fib_flags |= RTNH_F_DEAD;
1052 ret++;
1052 } 1053 }
1053 } 1054 }
1055 return ret;
1056}
1054 1057
1055 if (dev) { 1058int fib_sync_down_dev(struct net_device *dev, int force)
1056 struct fib_info *prev_fi = NULL; 1059{
1057 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1060 int ret = 0;
1058 struct hlist_head *head = &fib_info_devhash[hash]; 1061 int scope = RT_SCOPE_NOWHERE;
1059 struct hlist_node *node; 1062 struct fib_info *prev_fi = NULL;
1060 struct fib_nh *nh; 1063 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1064 struct hlist_head *head = &fib_info_devhash[hash];
1065 struct hlist_node *node;
1066 struct fib_nh *nh;
1061 1067
1062 hlist_for_each_entry(nh, node, head, nh_hash) { 1068 if (force)
1063 struct fib_info *fi = nh->nh_parent; 1069 scope = -1;
1064 int dead;
1065 1070
1066 BUG_ON(!fi->fib_nhs); 1071 hlist_for_each_entry(nh, node, head, nh_hash) {
1067 if (nh->nh_dev != dev || fi == prev_fi) 1072 struct fib_info *fi = nh->nh_parent;
1068 continue; 1073 int dead;
1069 prev_fi = fi; 1074
1070 dead = 0; 1075 BUG_ON(!fi->fib_nhs);
1071 change_nexthops(fi) { 1076 if (nh->nh_dev != dev || fi == prev_fi)
1072 if (nh->nh_flags&RTNH_F_DEAD) 1077 continue;
1073 dead++; 1078 prev_fi = fi;
1074 else if (nh->nh_dev == dev && 1079 dead = 0;
1075 nh->nh_scope != scope) { 1080 change_nexthops(fi) {
1076 nh->nh_flags |= RTNH_F_DEAD; 1081 if (nh->nh_flags&RTNH_F_DEAD)
1082 dead++;
1083 else if (nh->nh_dev == dev &&
1084 nh->nh_scope != scope) {
1085 nh->nh_flags |= RTNH_F_DEAD;
1077#ifdef CONFIG_IP_ROUTE_MULTIPATH 1086#ifdef CONFIG_IP_ROUTE_MULTIPATH
1078 spin_lock_bh(&fib_multipath_lock); 1087 spin_lock_bh(&fib_multipath_lock);
1079 fi->fib_power -= nh->nh_power; 1088 fi->fib_power -= nh->nh_power;
1080 nh->nh_power = 0; 1089 nh->nh_power = 0;
1081 spin_unlock_bh(&fib_multipath_lock); 1090 spin_unlock_bh(&fib_multipath_lock);
1082#endif 1091#endif
1083 dead++; 1092 dead++;
1084 } 1093 }
1085#ifdef CONFIG_IP_ROUTE_MULTIPATH 1094#ifdef CONFIG_IP_ROUTE_MULTIPATH
1086 if (force > 1 && nh->nh_dev == dev) { 1095 if (force > 1 && nh->nh_dev == dev) {
1087 dead = fi->fib_nhs; 1096 dead = fi->fib_nhs;
1088 break; 1097 break;
1089 }
1090#endif
1091 } endfor_nexthops(fi)
1092 if (dead == fi->fib_nhs) {
1093 fi->fib_flags |= RTNH_F_DEAD;
1094 ret++;
1095 } 1098 }
1099#endif
1100 } endfor_nexthops(fi)
1101 if (dead == fi->fib_nhs) {
1102 fi->fib_flags |= RTNH_F_DEAD;
1103 ret++;
1096 } 1104 }
1097 } 1105 }
1098 1106
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8d8c2915e064..35851c96bdfb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -82,7 +82,6 @@
82#include <net/ip_fib.h> 82#include <net/ip_fib.h>
83#include "fib_lookup.h" 83#include "fib_lookup.h"
84 84
85#undef CONFIG_IP_FIB_TRIE_STATS
86#define MAX_STAT_DEPTH 32 85#define MAX_STAT_DEPTH 32
87 86
88#define KEYLENGTH (8*sizeof(t_key)) 87#define KEYLENGTH (8*sizeof(t_key))
@@ -98,13 +97,13 @@ typedef unsigned int t_key;
98#define IS_LEAF(n) (n->parent & T_LEAF) 97#define IS_LEAF(n) (n->parent & T_LEAF)
99 98
100struct node { 99struct node {
101 t_key key;
102 unsigned long parent; 100 unsigned long parent;
101 t_key key;
103}; 102};
104 103
105struct leaf { 104struct leaf {
106 t_key key;
107 unsigned long parent; 105 unsigned long parent;
106 t_key key;
108 struct hlist_head list; 107 struct hlist_head list;
109 struct rcu_head rcu; 108 struct rcu_head rcu;
110}; 109};
@@ -117,12 +116,12 @@ struct leaf_info {
117}; 116};
118 117
119struct tnode { 118struct tnode {
120 t_key key;
121 unsigned long parent; 119 unsigned long parent;
122 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ 120 t_key key;
123 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ 121 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
124 unsigned short full_children; /* KEYLENGTH bits needed */ 122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
125 unsigned short empty_children; /* KEYLENGTH bits needed */ 123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */
126 struct rcu_head rcu; 125 struct rcu_head rcu;
127 struct node *child[0]; 126 struct node *child[0];
128}; 127};
@@ -144,6 +143,7 @@ struct trie_stat {
144 unsigned int tnodes; 143 unsigned int tnodes;
145 unsigned int leaves; 144 unsigned int leaves;
146 unsigned int nullpointers; 145 unsigned int nullpointers;
146 unsigned int prefixes;
147 unsigned int nodesizes[MAX_STAT_DEPTH]; 147 unsigned int nodesizes[MAX_STAT_DEPTH];
148}; 148};
149 149
@@ -152,25 +152,28 @@ struct trie {
152#ifdef CONFIG_IP_FIB_TRIE_STATS 152#ifdef CONFIG_IP_FIB_TRIE_STATS
153 struct trie_use_stats stats; 153 struct trie_use_stats stats;
154#endif 154#endif
155 int size;
156 unsigned int revision;
157}; 155};
158 156
159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 157static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 158static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
159 int wasfull);
161static struct node *resize(struct trie *t, struct tnode *tn); 160static struct node *resize(struct trie *t, struct tnode *tn);
162static struct tnode *inflate(struct trie *t, struct tnode *tn); 161static struct tnode *inflate(struct trie *t, struct tnode *tn);
163static struct tnode *halve(struct trie *t, struct tnode *tn); 162static struct tnode *halve(struct trie *t, struct tnode *tn);
164static void tnode_free(struct tnode *tn); 163static void tnode_free(struct tnode *tn);
165 164
166static struct kmem_cache *fn_alias_kmem __read_mostly; 165static struct kmem_cache *fn_alias_kmem __read_mostly;
167static struct trie *trie_local = NULL, *trie_main = NULL; 166static struct kmem_cache *trie_leaf_kmem __read_mostly;
168 167
169static inline struct tnode *node_parent(struct node *node) 168static inline struct tnode *node_parent(struct node *node)
170{ 169{
171 struct tnode *ret; 170 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
171}
172
173static inline struct tnode *node_parent_rcu(struct node *node)
174{
175 struct tnode *ret = node_parent(node);
172 176
173 ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
174 return rcu_dereference(ret); 177 return rcu_dereference(ret);
175} 178}
176 179
@@ -180,13 +183,18 @@ static inline void node_set_parent(struct node *node, struct tnode *ptr)
180 (unsigned long)ptr | NODE_TYPE(node)); 183 (unsigned long)ptr | NODE_TYPE(node));
181} 184}
182 185
183/* rcu_read_lock needs to be hold by caller from readside */ 186static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
187{
188 BUG_ON(i >= 1U << tn->bits);
184 189
185static inline struct node *tnode_get_child(struct tnode *tn, int i) 190 return tn->child[i];
191}
192
193static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
186{ 194{
187 BUG_ON(i >= 1 << tn->bits); 195 struct node *ret = tnode_get_child(tn, i);
188 196
189 return rcu_dereference(tn->child[i]); 197 return rcu_dereference(ret);
190} 198}
191 199
192static inline int tnode_child_length(const struct tnode *tn) 200static inline int tnode_child_length(const struct tnode *tn)
@@ -300,10 +308,10 @@ static inline void check_tnode(const struct tnode *tn)
300 WARN_ON(tn && tn->pos+tn->bits > 32); 308 WARN_ON(tn && tn->pos+tn->bits > 32);
301} 309}
302 310
303static int halve_threshold = 25; 311static const int halve_threshold = 25;
304static int inflate_threshold = 50; 312static const int inflate_threshold = 50;
305static int halve_threshold_root = 8; 313static const int halve_threshold_root = 8;
306static int inflate_threshold_root = 15; 314static const int inflate_threshold_root = 15;
307 315
308 316
309static void __alias_free_mem(struct rcu_head *head) 317static void __alias_free_mem(struct rcu_head *head)
@@ -319,7 +327,8 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa)
319 327
320static void __leaf_free_rcu(struct rcu_head *head) 328static void __leaf_free_rcu(struct rcu_head *head)
321{ 329{
322 kfree(container_of(head, struct leaf, rcu)); 330 struct leaf *l = container_of(head, struct leaf, rcu);
331 kmem_cache_free(trie_leaf_kmem, l);
323} 332}
324 333
325static void __leaf_info_free_rcu(struct rcu_head *head) 334static void __leaf_info_free_rcu(struct rcu_head *head)
@@ -332,12 +341,12 @@ static inline void free_leaf_info(struct leaf_info *leaf)
332 call_rcu(&leaf->rcu, __leaf_info_free_rcu); 341 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
333} 342}
334 343
335static struct tnode *tnode_alloc(unsigned int size) 344static struct tnode *tnode_alloc(size_t size)
336{ 345{
337 struct page *pages; 346 struct page *pages;
338 347
339 if (size <= PAGE_SIZE) 348 if (size <= PAGE_SIZE)
340 return kcalloc(size, 1, GFP_KERNEL); 349 return kzalloc(size, GFP_KERNEL);
341 350
342 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); 351 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
343 if (!pages) 352 if (!pages)
@@ -349,8 +358,8 @@ static struct tnode *tnode_alloc(unsigned int size)
349static void __tnode_free_rcu(struct rcu_head *head) 358static void __tnode_free_rcu(struct rcu_head *head)
350{ 359{
351 struct tnode *tn = container_of(head, struct tnode, rcu); 360 struct tnode *tn = container_of(head, struct tnode, rcu);
352 unsigned int size = sizeof(struct tnode) + 361 size_t size = sizeof(struct tnode) +
353 (1 << tn->bits) * sizeof(struct node *); 362 (sizeof(struct node *) << tn->bits);
354 363
355 if (size <= PAGE_SIZE) 364 if (size <= PAGE_SIZE)
356 kfree(tn); 365 kfree(tn);
@@ -369,7 +378,7 @@ static inline void tnode_free(struct tnode *tn)
369 378
370static struct leaf *leaf_new(void) 379static struct leaf *leaf_new(void)
371{ 380{
372 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); 381 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
373 if (l) { 382 if (l) {
374 l->parent = T_LEAF; 383 l->parent = T_LEAF;
375 INIT_HLIST_HEAD(&l->list); 384 INIT_HLIST_HEAD(&l->list);
@@ -387,14 +396,12 @@ static struct leaf_info *leaf_info_new(int plen)
387 return li; 396 return li;
388} 397}
389 398
390static struct tnode* tnode_new(t_key key, int pos, int bits) 399static struct tnode *tnode_new(t_key key, int pos, int bits)
391{ 400{
392 int nchildren = 1<<bits; 401 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
393 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
394 struct tnode *tn = tnode_alloc(sz); 402 struct tnode *tn = tnode_alloc(sz);
395 403
396 if (tn) { 404 if (tn) {
397 memset(tn, 0, sz);
398 tn->parent = T_TNODE; 405 tn->parent = T_TNODE;
399 tn->pos = pos; 406 tn->pos = pos;
400 tn->bits = bits; 407 tn->bits = bits;
@@ -403,8 +410,8 @@ static struct tnode* tnode_new(t_key key, int pos, int bits)
403 tn->empty_children = 1<<bits; 410 tn->empty_children = 1<<bits;
404 } 411 }
405 412
406 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), 413 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
407 (unsigned int) (sizeof(struct node) * 1<<bits)); 414 (unsigned long) (sizeof(struct node) << bits));
408 return tn; 415 return tn;
409} 416}
410 417
@@ -421,7 +428,8 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n)
421 return ((struct tnode *) n)->pos == tn->pos + tn->bits; 428 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
422} 429}
423 430
424static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n) 431static inline void put_child(struct trie *t, struct tnode *tn, int i,
432 struct node *n)
425{ 433{
426 tnode_put_child_reorg(tn, i, n, -1); 434 tnode_put_child_reorg(tn, i, n, -1);
427} 435}
@@ -431,14 +439,14 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, struct nod
431 * Update the value of full_children and empty_children. 439 * Update the value of full_children and empty_children.
432 */ 440 */
433 441
434static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) 442static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
443 int wasfull)
435{ 444{
436 struct node *chi = tn->child[i]; 445 struct node *chi = tn->child[i];
437 int isfull; 446 int isfull;
438 447
439 BUG_ON(i >= 1<<tn->bits); 448 BUG_ON(i >= 1<<tn->bits);
440 449
441
442 /* update emptyChildren */ 450 /* update emptyChildren */
443 if (n == NULL && chi != NULL) 451 if (n == NULL && chi != NULL)
444 tn->empty_children++; 452 tn->empty_children++;
@@ -571,11 +579,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
571 err = 0; 579 err = 0;
572 max_resize = 10; 580 max_resize = 10;
573 while ((tn->full_children > 0 && max_resize-- && 581 while ((tn->full_children > 0 && max_resize-- &&
574 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 582 50 * (tn->full_children + tnode_child_length(tn)
575 inflate_threshold_use * tnode_child_length(tn))) { 583 - tn->empty_children)
584 >= inflate_threshold_use * tnode_child_length(tn))) {
576 585
577 old_tn = tn; 586 old_tn = tn;
578 tn = inflate(t, tn); 587 tn = inflate(t, tn);
588
579 if (IS_ERR(tn)) { 589 if (IS_ERR(tn)) {
580 tn = old_tn; 590 tn = old_tn;
581#ifdef CONFIG_IP_FIB_TRIE_STATS 591#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -587,11 +597,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
587 597
588 if (max_resize < 0) { 598 if (max_resize < 0) {
589 if (!tn->parent) 599 if (!tn->parent)
590 printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n", 600 pr_warning("Fix inflate_threshold_root."
591 inflate_threshold_root, tn->bits); 601 " Now=%d size=%d bits\n",
602 inflate_threshold_root, tn->bits);
592 else 603 else
593 printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n", 604 pr_warning("Fix inflate_threshold."
594 inflate_threshold, tn->bits); 605 " Now=%d size=%d bits\n",
606 inflate_threshold, tn->bits);
595 } 607 }
596 608
597 check_tnode(tn); 609 check_tnode(tn);
@@ -628,11 +640,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
628 640
629 if (max_resize < 0) { 641 if (max_resize < 0) {
630 if (!tn->parent) 642 if (!tn->parent)
631 printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n", 643 pr_warning("Fix halve_threshold_root."
632 halve_threshold_root, tn->bits); 644 " Now=%d size=%d bits\n",
645 halve_threshold_root, tn->bits);
633 else 646 else
634 printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n", 647 pr_warning("Fix halve_threshold."
635 halve_threshold, tn->bits); 648 " Now=%d size=%d bits\n",
649 halve_threshold, tn->bits);
636 } 650 }
637 651
638 /* Only one child remains */ 652 /* Only one child remains */
@@ -656,7 +670,6 @@ static struct node *resize(struct trie *t, struct tnode *tn)
656 670
657static struct tnode *inflate(struct trie *t, struct tnode *tn) 671static struct tnode *inflate(struct trie *t, struct tnode *tn)
658{ 672{
659 struct tnode *inode;
660 struct tnode *oldtnode = tn; 673 struct tnode *oldtnode = tn;
661 int olen = tnode_child_length(tn); 674 int olen = tnode_child_length(tn);
662 int i; 675 int i;
@@ -676,8 +689,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
676 */ 689 */
677 690
678 for (i = 0; i < olen; i++) { 691 for (i = 0; i < olen; i++) {
679 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); 692 struct tnode *inode;
680 693
694 inode = (struct tnode *) tnode_get_child(oldtnode, i);
681 if (inode && 695 if (inode &&
682 IS_TNODE(inode) && 696 IS_TNODE(inode) &&
683 inode->pos == oldtnode->pos + oldtnode->bits && 697 inode->pos == oldtnode->pos + oldtnode->bits &&
@@ -704,6 +718,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
704 } 718 }
705 719
706 for (i = 0; i < olen; i++) { 720 for (i = 0; i < olen; i++) {
721 struct tnode *inode;
707 struct node *node = tnode_get_child(oldtnode, i); 722 struct node *node = tnode_get_child(oldtnode, i);
708 struct tnode *left, *right; 723 struct tnode *left, *right;
709 int size, j; 724 int size, j;
@@ -716,8 +731,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
716 731
717 if (IS_LEAF(node) || ((struct tnode *) node)->pos > 732 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
718 tn->pos + tn->bits - 1) { 733 tn->pos + tn->bits - 1) {
719 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, 734 if (tkey_extract_bits(node->key,
720 1) == 0) 735 oldtnode->pos + oldtnode->bits,
736 1) == 0)
721 put_child(t, tn, 2*i, node); 737 put_child(t, tn, 2*i, node);
722 else 738 else
723 put_child(t, tn, 2*i+1, node); 739 put_child(t, tn, 2*i+1, node);
@@ -877,19 +893,6 @@ nomem:
877 } 893 }
878} 894}
879 895
880static void trie_init(struct trie *t)
881{
882 if (!t)
883 return;
884
885 t->size = 0;
886 rcu_assign_pointer(t->trie, NULL);
887 t->revision = 0;
888#ifdef CONFIG_IP_FIB_TRIE_STATS
889 memset(&t->stats, 0, sizeof(struct trie_use_stats));
890#endif
891}
892
893/* readside must use rcu_read_lock currently dump routines 896/* readside must use rcu_read_lock currently dump routines
894 via get_fa_head and dump */ 897 via get_fa_head and dump */
895 898
@@ -906,7 +909,7 @@ static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
906 return NULL; 909 return NULL;
907} 910}
908 911
909static inline struct list_head * get_fa_head(struct leaf *l, int plen) 912static inline struct list_head *get_fa_head(struct leaf *l, int plen)
910{ 913{
911 struct leaf_info *li = find_leaf_info(l, plen); 914 struct leaf_info *li = find_leaf_info(l, plen);
912 915
@@ -956,7 +959,10 @@ fib_find_node(struct trie *t, u32 key)
956 959
957 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 960 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
958 pos = tn->pos + tn->bits; 961 pos = tn->pos + tn->bits;
959 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 962 n = tnode_get_child_rcu(tn,
963 tkey_extract_bits(key,
964 tn->pos,
965 tn->bits));
960 } else 966 } else
961 break; 967 break;
962 } 968 }
@@ -977,8 +983,10 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
977 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 983 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
978 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 984 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
979 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 985 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
980 tn = (struct tnode *) resize (t, (struct tnode *)tn); 986 tn = (struct tnode *) resize(t, (struct tnode *)tn);
981 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); 987
988 tnode_put_child_reorg((struct tnode *)tp, cindex,
989 (struct node *)tn, wasfull);
982 990
983 tp = node_parent((struct node *) tn); 991 tp = node_parent((struct node *) tn);
984 if (!tp) 992 if (!tp)
@@ -988,15 +996,14 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
988 996
989 /* Handle last (top) tnode */ 997 /* Handle last (top) tnode */
990 if (IS_TNODE(tn)) 998 if (IS_TNODE(tn))
991 tn = (struct tnode*) resize(t, (struct tnode *)tn); 999 tn = (struct tnode *)resize(t, (struct tnode *)tn);
992 1000
993 return (struct node*) tn; 1001 return (struct node *)tn;
994} 1002}
995 1003
996/* only used from updater-side */ 1004/* only used from updater-side */
997 1005
998static struct list_head * 1006static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
999fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1000{ 1007{
1001 int pos, newpos; 1008 int pos, newpos;
1002 struct tnode *tp = NULL, *tn = NULL; 1009 struct tnode *tp = NULL, *tn = NULL;
@@ -1036,7 +1043,10 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1036 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 1043 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
1037 tp = tn; 1044 tp = tn;
1038 pos = tn->pos + tn->bits; 1045 pos = tn->pos + tn->bits;
1039 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 1046 n = tnode_get_child(tn,
1047 tkey_extract_bits(key,
1048 tn->pos,
1049 tn->bits));
1040 1050
1041 BUG_ON(n && node_parent(n) != tn); 1051 BUG_ON(n && node_parent(n) != tn);
1042 } else 1052 } else
@@ -1054,34 +1064,27 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1054 /* Case 1: n is a leaf. Compare prefixes */ 1064 /* Case 1: n is a leaf. Compare prefixes */
1055 1065
1056 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 1066 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1057 struct leaf *l = (struct leaf *) n; 1067 l = (struct leaf *) n;
1058
1059 li = leaf_info_new(plen); 1068 li = leaf_info_new(plen);
1060 1069
1061 if (!li) { 1070 if (!li)
1062 *err = -ENOMEM; 1071 return NULL;
1063 goto err;
1064 }
1065 1072
1066 fa_head = &li->falh; 1073 fa_head = &li->falh;
1067 insert_leaf_info(&l->list, li); 1074 insert_leaf_info(&l->list, li);
1068 goto done; 1075 goto done;
1069 } 1076 }
1070 t->size++;
1071 l = leaf_new(); 1077 l = leaf_new();
1072 1078
1073 if (!l) { 1079 if (!l)
1074 *err = -ENOMEM; 1080 return NULL;
1075 goto err;
1076 }
1077 1081
1078 l->key = key; 1082 l->key = key;
1079 li = leaf_info_new(plen); 1083 li = leaf_info_new(plen);
1080 1084
1081 if (!li) { 1085 if (!li) {
1082 tnode_free((struct tnode *) l); 1086 tnode_free((struct tnode *) l);
1083 *err = -ENOMEM; 1087 return NULL;
1084 goto err;
1085 } 1088 }
1086 1089
1087 fa_head = &li->falh; 1090 fa_head = &li->falh;
@@ -1117,8 +1120,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1117 if (!tn) { 1120 if (!tn) {
1118 free_leaf_info(li); 1121 free_leaf_info(li);
1119 tnode_free((struct tnode *) l); 1122 tnode_free((struct tnode *) l);
1120 *err = -ENOMEM; 1123 return NULL;
1121 goto err;
1122 } 1124 }
1123 1125
1124 node_set_parent((struct node *)tn, tp); 1126 node_set_parent((struct node *)tn, tp);
@@ -1129,23 +1131,23 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1129 1131
1130 if (tp) { 1132 if (tp) {
1131 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1133 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1132 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); 1134 put_child(t, (struct tnode *)tp, cindex,
1135 (struct node *)tn);
1133 } else { 1136 } else {
1134 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */ 1137 rcu_assign_pointer(t->trie, (struct node *)tn);
1135 tp = tn; 1138 tp = tn;
1136 } 1139 }
1137 } 1140 }
1138 1141
1139 if (tp && tp->pos + tp->bits > 32) 1142 if (tp && tp->pos + tp->bits > 32)
1140 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 1143 pr_warning("fib_trie"
1141 tp, tp->pos, tp->bits, key, plen); 1144 " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1145 tp, tp->pos, tp->bits, key, plen);
1142 1146
1143 /* Rebalance the trie */ 1147 /* Rebalance the trie */
1144 1148
1145 rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 1149 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
1146done: 1150done:
1147 t->revision++;
1148err:
1149 return fa_head; 1151 return fa_head;
1150} 1152}
1151 1153
@@ -1203,17 +1205,45 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1203 * and we need to allocate a new one of those as well. 1205 * and we need to allocate a new one of those as well.
1204 */ 1206 */
1205 1207
1206 if (fa && fa->fa_info->fib_priority == fi->fib_priority) { 1208 if (fa && fa->fa_tos == tos &&
1207 struct fib_alias *fa_orig; 1209 fa->fa_info->fib_priority == fi->fib_priority) {
1210 struct fib_alias *fa_first, *fa_match;
1208 1211
1209 err = -EEXIST; 1212 err = -EEXIST;
1210 if (cfg->fc_nlflags & NLM_F_EXCL) 1213 if (cfg->fc_nlflags & NLM_F_EXCL)
1211 goto out; 1214 goto out;
1212 1215
1216 /* We have 2 goals:
1217 * 1. Find exact match for type, scope, fib_info to avoid
1218 * duplicate routes
1219 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1220 */
1221 fa_match = NULL;
1222 fa_first = fa;
1223 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1224 list_for_each_entry_continue(fa, fa_head, fa_list) {
1225 if (fa->fa_tos != tos)
1226 break;
1227 if (fa->fa_info->fib_priority != fi->fib_priority)
1228 break;
1229 if (fa->fa_type == cfg->fc_type &&
1230 fa->fa_scope == cfg->fc_scope &&
1231 fa->fa_info == fi) {
1232 fa_match = fa;
1233 break;
1234 }
1235 }
1236
1213 if (cfg->fc_nlflags & NLM_F_REPLACE) { 1237 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1214 struct fib_info *fi_drop; 1238 struct fib_info *fi_drop;
1215 u8 state; 1239 u8 state;
1216 1240
1241 fa = fa_first;
1242 if (fa_match) {
1243 if (fa == fa_match)
1244 err = 0;
1245 goto out;
1246 }
1217 err = -ENOBUFS; 1247 err = -ENOBUFS;
1218 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 1248 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1219 if (new_fa == NULL) 1249 if (new_fa == NULL)
@@ -1225,7 +1255,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1225 new_fa->fa_type = cfg->fc_type; 1255 new_fa->fa_type = cfg->fc_type;
1226 new_fa->fa_scope = cfg->fc_scope; 1256 new_fa->fa_scope = cfg->fc_scope;
1227 state = fa->fa_state; 1257 state = fa->fa_state;
1228 new_fa->fa_state &= ~FA_S_ACCESSED; 1258 new_fa->fa_state = state & ~FA_S_ACCESSED;
1229 1259
1230 list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 1260 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1231 alias_free_mem_rcu(fa); 1261 alias_free_mem_rcu(fa);
@@ -1242,20 +1272,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1242 * uses the same scope, type, and nexthop 1272 * uses the same scope, type, and nexthop
1243 * information. 1273 * information.
1244 */ 1274 */
1245 fa_orig = fa; 1275 if (fa_match)
1246 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { 1276 goto out;
1247 if (fa->fa_tos != tos) 1277
1248 break;
1249 if (fa->fa_info->fib_priority != fi->fib_priority)
1250 break;
1251 if (fa->fa_type == cfg->fc_type &&
1252 fa->fa_scope == cfg->fc_scope &&
1253 fa->fa_info == fi) {
1254 goto out;
1255 }
1256 }
1257 if (!(cfg->fc_nlflags & NLM_F_APPEND)) 1278 if (!(cfg->fc_nlflags & NLM_F_APPEND))
1258 fa = fa_orig; 1279 fa = fa_first;
1259 } 1280 }
1260 err = -ENOENT; 1281 err = -ENOENT;
1261 if (!(cfg->fc_nlflags & NLM_F_CREATE)) 1282 if (!(cfg->fc_nlflags & NLM_F_CREATE))
@@ -1276,10 +1297,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1276 */ 1297 */
1277 1298
1278 if (!fa_head) { 1299 if (!fa_head) {
1279 err = 0; 1300 fa_head = fib_insert_node(t, key, plen);
1280 fa_head = fib_insert_node(t, &err, key, plen); 1301 if (unlikely(!fa_head)) {
1281 if (err) 1302 err = -ENOMEM;
1282 goto out_free_new_fa; 1303 goto out_free_new_fa;
1304 }
1283 } 1305 }
1284 1306
1285 list_add_tail_rcu(&new_fa->fa_list, 1307 list_add_tail_rcu(&new_fa->fa_list,
@@ -1299,40 +1321,41 @@ err:
1299 return err; 1321 return err;
1300} 1322}
1301 1323
1302
1303/* should be called with rcu_read_lock */ 1324/* should be called with rcu_read_lock */
1304static inline int check_leaf(struct trie *t, struct leaf *l, 1325static int check_leaf(struct trie *t, struct leaf *l,
1305 t_key key, int *plen, const struct flowi *flp, 1326 t_key key, const struct flowi *flp,
1306 struct fib_result *res) 1327 struct fib_result *res)
1307{ 1328{
1308 int err, i;
1309 __be32 mask;
1310 struct leaf_info *li; 1329 struct leaf_info *li;
1311 struct hlist_head *hhead = &l->list; 1330 struct hlist_head *hhead = &l->list;
1312 struct hlist_node *node; 1331 struct hlist_node *node;
1313 1332
1314 hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1333 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1315 i = li->plen; 1334 int err;
1316 mask = inet_make_mask(i); 1335 int plen = li->plen;
1336 __be32 mask = inet_make_mask(plen);
1337
1317 if (l->key != (key & ntohl(mask))) 1338 if (l->key != (key & ntohl(mask)))
1318 continue; 1339 continue;
1319 1340
1320 if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) { 1341 err = fib_semantic_match(&li->falh, flp, res,
1321 *plen = i; 1342 htonl(l->key), mask, plen);
1343
1322#ifdef CONFIG_IP_FIB_TRIE_STATS 1344#ifdef CONFIG_IP_FIB_TRIE_STATS
1345 if (err <= 0)
1323 t->stats.semantic_match_passed++; 1346 t->stats.semantic_match_passed++;
1347 else
1348 t->stats.semantic_match_miss++;
1324#endif 1349#endif
1325 return err; 1350 if (err <= 0)
1326 } 1351 return plen;
1327#ifdef CONFIG_IP_FIB_TRIE_STATS
1328 t->stats.semantic_match_miss++;
1329#endif
1330 } 1352 }
1331 return 1; 1353
1354 return -1;
1332} 1355}
1333 1356
1334static int 1357static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1335fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 1358 struct fib_result *res)
1336{ 1359{
1337 struct trie *t = (struct trie *) tb->tb_data; 1360 struct trie *t = (struct trie *) tb->tb_data;
1338 int plen, ret = 0; 1361 int plen, ret = 0;
@@ -1359,10 +1382,13 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1359 1382
1360 /* Just a leaf? */ 1383 /* Just a leaf? */
1361 if (IS_LEAF(n)) { 1384 if (IS_LEAF(n)) {
1362 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 1385 plen = check_leaf(t, (struct leaf *)n, key, flp, res);
1363 goto found; 1386 if (plen < 0)
1364 goto failed; 1387 goto failed;
1388 ret = 0;
1389 goto found;
1365 } 1390 }
1391
1366 pn = (struct tnode *) n; 1392 pn = (struct tnode *) n;
1367 chopped_off = 0; 1393 chopped_off = 0;
1368 1394
@@ -1384,14 +1410,14 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1384 } 1410 }
1385 1411
1386 if (IS_LEAF(n)) { 1412 if (IS_LEAF(n)) {
1387 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 1413 plen = check_leaf(t, (struct leaf *)n, key, flp, res);
1388 goto found; 1414 if (plen < 0)
1389 else
1390 goto backtrace; 1415 goto backtrace;
1416
1417 ret = 0;
1418 goto found;
1391 } 1419 }
1392 1420
1393#define HL_OPTIMIZE
1394#ifdef HL_OPTIMIZE
1395 cn = (struct tnode *)n; 1421 cn = (struct tnode *)n;
1396 1422
1397 /* 1423 /*
@@ -1420,12 +1446,13 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1420 * *are* zero. 1446 * *are* zero.
1421 */ 1447 */
1422 1448
1423 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ 1449 /* NOTA BENE: Checking only skipped bits
1450 for the new node here */
1424 1451
1425 if (current_prefix_length < pos+bits) { 1452 if (current_prefix_length < pos+bits) {
1426 if (tkey_extract_bits(cn->key, current_prefix_length, 1453 if (tkey_extract_bits(cn->key, current_prefix_length,
1427 cn->pos - current_prefix_length) != 0 || 1454 cn->pos - current_prefix_length)
1428 !(cn->child[0])) 1455 || !(cn->child[0]))
1429 goto backtrace; 1456 goto backtrace;
1430 } 1457 }
1431 1458
@@ -1448,14 +1475,17 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1448 * new tnode's key. 1475 * new tnode's key.
1449 */ 1476 */
1450 1477
1451 /* Note: We aren't very concerned about the piece of the key 1478 /*
1452 * that precede pn->pos+pn->bits, since these have already been 1479 * Note: We aren't very concerned about the piece of
1453 * checked. The bits after cn->pos aren't checked since these are 1480 * the key that precede pn->pos+pn->bits, since these
1454 * by definition "unknown" at this point. Thus, what we want to 1481 * have already been checked. The bits after cn->pos
1455 * see is if we are about to enter the "prefix matching" state, 1482 * aren't checked since these are by definition
1456 * and in that case verify that the skipped bits that will prevail 1483 * "unknown" at this point. Thus, what we want to see
1457 * throughout this subtree are zero, as they have to be if we are 1484 * is if we are about to enter the "prefix matching"
1458 * to find a matching prefix. 1485 * state, and in that case verify that the skipped
1486 * bits that will prevail throughout this subtree are
1487 * zero, as they have to be if we are to find a
1488 * matching prefix.
1459 */ 1489 */
1460 1490
1461 node_prefix = mask_pfx(cn->key, cn->pos); 1491 node_prefix = mask_pfx(cn->key, cn->pos);
@@ -1463,13 +1493,15 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1463 pref_mismatch = key_prefix^node_prefix; 1493 pref_mismatch = key_prefix^node_prefix;
1464 mp = 0; 1494 mp = 0;
1465 1495
1466 /* In short: If skipped bits in this node do not match the search 1496 /*
1467 * key, enter the "prefix matching" state.directly. 1497 * In short: If skipped bits in this node do not match
1498 * the search key, enter the "prefix matching"
1499 * state.directly.
1468 */ 1500 */
1469 if (pref_mismatch) { 1501 if (pref_mismatch) {
1470 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 1502 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1471 mp++; 1503 mp++;
1472 pref_mismatch = pref_mismatch <<1; 1504 pref_mismatch = pref_mismatch << 1;
1473 } 1505 }
1474 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 1506 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1475 1507
@@ -1479,7 +1511,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1479 if (current_prefix_length >= cn->pos) 1511 if (current_prefix_length >= cn->pos)
1480 current_prefix_length = mp; 1512 current_prefix_length = mp;
1481 } 1513 }
1482#endif 1514
1483 pn = (struct tnode *)n; /* Descend */ 1515 pn = (struct tnode *)n; /* Descend */
1484 chopped_off = 0; 1516 chopped_off = 0;
1485 continue; 1517 continue;
@@ -1488,12 +1520,14 @@ backtrace:
1488 chopped_off++; 1520 chopped_off++;
1489 1521
1490 /* As zero don't change the child key (cindex) */ 1522 /* As zero don't change the child key (cindex) */
1491 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) 1523 while ((chopped_off <= pn->bits)
1524 && !(cindex & (1<<(chopped_off-1))))
1492 chopped_off++; 1525 chopped_off++;
1493 1526
1494 /* Decrease current_... with bits chopped off */ 1527 /* Decrease current_... with bits chopped off */
1495 if (current_prefix_length > pn->pos + pn->bits - chopped_off) 1528 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1496 current_prefix_length = pn->pos + pn->bits - chopped_off; 1529 current_prefix_length = pn->pos + pn->bits
1530 - chopped_off;
1497 1531
1498 /* 1532 /*
1499 * Either we do the actual chop off according or if we have 1533 * Either we do the actual chop off according or if we have
@@ -1525,52 +1559,23 @@ found:
1525 return ret; 1559 return ret;
1526} 1560}
1527 1561
1528/* only called from updater side */ 1562/*
1529static int trie_leaf_remove(struct trie *t, t_key key) 1563 * Remove the leaf and return parent.
1564 */
1565static void trie_leaf_remove(struct trie *t, struct leaf *l)
1530{ 1566{
1531 t_key cindex; 1567 struct tnode *tp = node_parent((struct node *) l);
1532 struct tnode *tp = NULL;
1533 struct node *n = t->trie;
1534 struct leaf *l;
1535 1568
1536 pr_debug("entering trie_leaf_remove(%p)\n", n); 1569 pr_debug("entering trie_leaf_remove(%p)\n", l);
1537
1538 /* Note that in the case skipped bits, those bits are *not* checked!
1539 * When we finish this, we will have NULL or a T_LEAF, and the
1540 * T_LEAF may or may not match our key.
1541 */
1542
1543 while (n != NULL && IS_TNODE(n)) {
1544 struct tnode *tn = (struct tnode *) n;
1545 check_tnode(tn);
1546 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1547
1548 BUG_ON(n && node_parent(n) != tn);
1549 }
1550 l = (struct leaf *) n;
1551
1552 if (!n || !tkey_equals(l->key, key))
1553 return 0;
1554
1555 /*
1556 * Key found.
1557 * Remove the leaf and rebalance the tree
1558 */
1559
1560 t->revision++;
1561 t->size--;
1562
1563 tp = node_parent(n);
1564 tnode_free((struct tnode *) n);
1565 1570
1566 if (tp) { 1571 if (tp) {
1567 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1572 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
1568 put_child(t, (struct tnode *)tp, cindex, NULL); 1573 put_child(t, (struct tnode *)tp, cindex, NULL);
1569 rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 1574 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
1570 } else 1575 } else
1571 rcu_assign_pointer(t->trie, NULL); 1576 rcu_assign_pointer(t->trie, NULL);
1572 1577
1573 return 1; 1578 tnode_free((struct tnode *) l);
1574} 1579}
1575 1580
1576/* 1581/*
@@ -1611,9 +1616,8 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1611 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 1616 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1612 1617
1613 fa_to_delete = NULL; 1618 fa_to_delete = NULL;
1614 fa_head = fa->fa_list.prev; 1619 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1615 1620 list_for_each_entry_continue(fa, fa_head, fa_list) {
1616 list_for_each_entry(fa, fa_head, fa_list) {
1617 struct fib_info *fi = fa->fa_info; 1621 struct fib_info *fi = fa->fa_info;
1618 1622
1619 if (fa->fa_tos != tos) 1623 if (fa->fa_tos != tos)
@@ -1648,7 +1652,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1648 } 1652 }
1649 1653
1650 if (hlist_empty(&l->list)) 1654 if (hlist_empty(&l->list))
1651 trie_leaf_remove(t, key); 1655 trie_leaf_remove(t, l);
1652 1656
1653 if (fa->fa_state & FA_S_ACCESSED) 1657 if (fa->fa_state & FA_S_ACCESSED)
1654 rt_cache_flush(-1); 1658 rt_cache_flush(-1);
@@ -1694,96 +1698,106 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
1694 return found; 1698 return found;
1695} 1699}
1696 1700
1697/* rcu_read_lock needs to be hold by caller from readside */ 1701/*
1698 1702 * Scan for the next right leaf starting at node p->child[idx]
1699static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) 1703 * Since we have back pointer, no recursion necessary.
1704 */
1705static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1700{ 1706{
1701 struct node *c = (struct node *) thisleaf; 1707 do {
1702 struct tnode *p; 1708 t_key idx;
1703 int idx;
1704 struct node *trie = rcu_dereference(t->trie);
1705 1709
1706 if (c == NULL) {
1707 if (trie == NULL)
1708 return NULL;
1709
1710 if (IS_LEAF(trie)) /* trie w. just a leaf */
1711 return (struct leaf *) trie;
1712
1713 p = (struct tnode*) trie; /* Start */
1714 } else
1715 p = node_parent(c);
1716
1717 while (p) {
1718 int pos, last;
1719
1720 /* Find the next child of the parent */
1721 if (c) 1710 if (c)
1722 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); 1711 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
1723 else 1712 else
1724 pos = 0; 1713 idx = 0;
1725
1726 last = 1 << p->bits;
1727 for (idx = pos; idx < last ; idx++) {
1728 c = rcu_dereference(p->child[idx]);
1729 1714
1715 while (idx < 1u << p->bits) {
1716 c = tnode_get_child_rcu(p, idx++);
1730 if (!c) 1717 if (!c)
1731 continue; 1718 continue;
1732 1719
1733 /* Decend if tnode */ 1720 if (IS_LEAF(c)) {
1734 while (IS_TNODE(c)) { 1721 prefetch(p->child[idx]);
1735 p = (struct tnode *) c; 1722 return (struct leaf *) c;
1736 idx = 0;
1737
1738 /* Rightmost non-NULL branch */
1739 if (p && IS_TNODE(p))
1740 while (!(c = rcu_dereference(p->child[idx]))
1741 && idx < (1<<p->bits)) idx++;
1742
1743 /* Done with this tnode? */
1744 if (idx >= (1 << p->bits) || !c)
1745 goto up;
1746 } 1723 }
1747 return (struct leaf *) c; 1724
1725 /* Rescan start scanning in new node */
1726 p = (struct tnode *) c;
1727 idx = 0;
1748 } 1728 }
1749up: 1729
1750 /* No more children go up one step */ 1730 /* Node empty, walk back up to parent */
1751 c = (struct node *) p; 1731 c = (struct node *) p;
1752 p = node_parent(c); 1732 } while ( (p = node_parent_rcu(c)) != NULL);
1733
1734 return NULL; /* Root of trie */
1735}
1736
1737static struct leaf *trie_firstleaf(struct trie *t)
1738{
1739 struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
1740
1741 if (!n)
1742 return NULL;
1743
1744 if (IS_LEAF(n)) /* trie is just a leaf */
1745 return (struct leaf *) n;
1746
1747 return leaf_walk_rcu(n, NULL);
1748}
1749
1750static struct leaf *trie_nextleaf(struct leaf *l)
1751{
1752 struct node *c = (struct node *) l;
1753 struct tnode *p = node_parent(c);
1754
1755 if (!p)
1756 return NULL; /* trie with just one leaf */
1757
1758 return leaf_walk_rcu(p, c);
1759}
1760
1761static struct leaf *trie_leafindex(struct trie *t, int index)
1762{
1763 struct leaf *l = trie_firstleaf(t);
1764
1765 while (index-- > 0) {
1766 l = trie_nextleaf(l);
1767 if (!l)
1768 break;
1753 } 1769 }
1754 return NULL; /* Ready. Root of trie */ 1770 return l;
1755} 1771}
1756 1772
1773
1757/* 1774/*
1758 * Caller must hold RTNL. 1775 * Caller must hold RTNL.
1759 */ 1776 */
1760static int fn_trie_flush(struct fib_table *tb) 1777static int fn_trie_flush(struct fib_table *tb)
1761{ 1778{
1762 struct trie *t = (struct trie *) tb->tb_data; 1779 struct trie *t = (struct trie *) tb->tb_data;
1763 struct leaf *ll = NULL, *l = NULL; 1780 struct leaf *l, *ll = NULL;
1764 int found = 0, h; 1781 int found = 0;
1765
1766 t->revision++;
1767 1782
1768 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1783 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
1769 found += trie_flush_leaf(t, l); 1784 found += trie_flush_leaf(t, l);
1770 1785
1771 if (ll && hlist_empty(&ll->list)) 1786 if (ll && hlist_empty(&ll->list))
1772 trie_leaf_remove(t, ll->key); 1787 trie_leaf_remove(t, ll);
1773 ll = l; 1788 ll = l;
1774 } 1789 }
1775 1790
1776 if (ll && hlist_empty(&ll->list)) 1791 if (ll && hlist_empty(&ll->list))
1777 trie_leaf_remove(t, ll->key); 1792 trie_leaf_remove(t, ll);
1778 1793
1779 pr_debug("trie_flush found=%d\n", found); 1794 pr_debug("trie_flush found=%d\n", found);
1780 return found; 1795 return found;
1781} 1796}
1782 1797
1783static int trie_last_dflt = -1; 1798static void fn_trie_select_default(struct fib_table *tb,
1784 1799 const struct flowi *flp,
1785static void 1800 struct fib_result *res)
1786fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1787{ 1801{
1788 struct trie *t = (struct trie *) tb->tb_data; 1802 struct trie *t = (struct trie *) tb->tb_data;
1789 int order, last_idx; 1803 int order, last_idx;
@@ -1828,51 +1842,41 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
1828 if (next_fi != res->fi) 1842 if (next_fi != res->fi)
1829 break; 1843 break;
1830 } else if (!fib_detect_death(fi, order, &last_resort, 1844 } else if (!fib_detect_death(fi, order, &last_resort,
1831 &last_idx, &trie_last_dflt)) { 1845 &last_idx, tb->tb_default)) {
1832 if (res->fi) 1846 fib_result_assign(res, fi);
1833 fib_info_put(res->fi); 1847 tb->tb_default = order;
1834 res->fi = fi;
1835 atomic_inc(&fi->fib_clntref);
1836 trie_last_dflt = order;
1837 goto out; 1848 goto out;
1838 } 1849 }
1839 fi = next_fi; 1850 fi = next_fi;
1840 order++; 1851 order++;
1841 } 1852 }
1842 if (order <= 0 || fi == NULL) { 1853 if (order <= 0 || fi == NULL) {
1843 trie_last_dflt = -1; 1854 tb->tb_default = -1;
1844 goto out; 1855 goto out;
1845 } 1856 }
1846 1857
1847 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { 1858 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1848 if (res->fi) 1859 tb->tb_default)) {
1849 fib_info_put(res->fi); 1860 fib_result_assign(res, fi);
1850 res->fi = fi; 1861 tb->tb_default = order;
1851 atomic_inc(&fi->fib_clntref);
1852 trie_last_dflt = order;
1853 goto out; 1862 goto out;
1854 } 1863 }
1855 if (last_idx >= 0) { 1864 if (last_idx >= 0)
1856 if (res->fi) 1865 fib_result_assign(res, last_resort);
1857 fib_info_put(res->fi); 1866 tb->tb_default = last_idx;
1858 res->fi = last_resort; 1867out:
1859 if (last_resort)
1860 atomic_inc(&last_resort->fib_clntref);
1861 }
1862 trie_last_dflt = last_idx;
1863 out:;
1864 rcu_read_unlock(); 1868 rcu_read_unlock();
1865} 1869}
1866 1870
1867static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, 1871static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1872 struct fib_table *tb,
1868 struct sk_buff *skb, struct netlink_callback *cb) 1873 struct sk_buff *skb, struct netlink_callback *cb)
1869{ 1874{
1870 int i, s_i; 1875 int i, s_i;
1871 struct fib_alias *fa; 1876 struct fib_alias *fa;
1872
1873 __be32 xkey = htonl(key); 1877 __be32 xkey = htonl(key);
1874 1878
1875 s_i = cb->args[4]; 1879 s_i = cb->args[5];
1876 i = 0; 1880 i = 0;
1877 1881
1878 /* rcu_read_lock is hold by caller */ 1882 /* rcu_read_lock is hold by caller */
@@ -1882,7 +1886,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1882 i++; 1886 i++;
1883 continue; 1887 continue;
1884 } 1888 }
1885 BUG_ON(!fa->fa_info);
1886 1889
1887 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 1890 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1888 cb->nlh->nlmsg_seq, 1891 cb->nlh->nlmsg_seq,
@@ -1893,119 +1896,130 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1893 xkey, 1896 xkey,
1894 plen, 1897 plen,
1895 fa->fa_tos, 1898 fa->fa_tos,
1896 fa->fa_info, 0) < 0) { 1899 fa->fa_info, NLM_F_MULTI) < 0) {
1897 cb->args[4] = i; 1900 cb->args[5] = i;
1898 return -1; 1901 return -1;
1899 } 1902 }
1900 i++; 1903 i++;
1901 } 1904 }
1902 cb->args[4] = i; 1905 cb->args[5] = i;
1903 return skb->len; 1906 return skb->len;
1904} 1907}
1905 1908
1906static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb, 1909static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1907 struct netlink_callback *cb) 1910 struct sk_buff *skb, struct netlink_callback *cb)
1908{ 1911{
1909 int h, s_h; 1912 struct leaf_info *li;
1910 struct list_head *fa_head; 1913 struct hlist_node *node;
1911 struct leaf *l = NULL; 1914 int i, s_i;
1912 1915
1913 s_h = cb->args[3]; 1916 s_i = cb->args[4];
1917 i = 0;
1914 1918
1915 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1919 /* rcu_read_lock is hold by caller */
1916 if (h < s_h) 1920 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1921 if (i < s_i) {
1922 i++;
1917 continue; 1923 continue;
1918 if (h > s_h) 1924 }
1919 memset(&cb->args[4], 0,
1920 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1921 1925
1922 fa_head = get_fa_head(l, plen); 1926 if (i > s_i)
1927 cb->args[5] = 0;
1923 1928
1924 if (!fa_head) 1929 if (list_empty(&li->falh))
1925 continue; 1930 continue;
1926 1931
1927 if (list_empty(fa_head)) 1932 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
1928 continue; 1933 cb->args[4] = i;
1929
1930 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1931 cb->args[3] = h;
1932 return -1; 1934 return -1;
1933 } 1935 }
1936 i++;
1934 } 1937 }
1935 cb->args[3] = h; 1938
1939 cb->args[4] = i;
1936 return skb->len; 1940 return skb->len;
1937} 1941}
1938 1942
1939static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 1943static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1944 struct netlink_callback *cb)
1940{ 1945{
1941 int m, s_m; 1946 struct leaf *l;
1942 struct trie *t = (struct trie *) tb->tb_data; 1947 struct trie *t = (struct trie *) tb->tb_data;
1943 1948 t_key key = cb->args[2];
1944 s_m = cb->args[2]; 1949 int count = cb->args[3];
1945 1950
1946 rcu_read_lock(); 1951 rcu_read_lock();
1947 for (m = 0; m <= 32; m++) { 1952 /* Dump starting at last key.
1948 if (m < s_m) 1953 * Note: 0.0.0.0/0 (ie default) is first key.
1949 continue; 1954 */
1950 if (m > s_m) 1955 if (count == 0)
1951 memset(&cb->args[3], 0, 1956 l = trie_firstleaf(t);
1952 sizeof(cb->args) - 3*sizeof(cb->args[0])); 1957 else {
1958 /* Normally, continue from last key, but if that is missing
1959 * fallback to using slow rescan
1960 */
1961 l = fib_find_node(t, key);
1962 if (!l)
1963 l = trie_leafindex(t, count);
1964 }
1953 1965
1954 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 1966 while (l) {
1955 cb->args[2] = m; 1967 cb->args[2] = l->key;
1956 goto out; 1968 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
1969 cb->args[3] = count;
1970 rcu_read_unlock();
1971 return -1;
1957 } 1972 }
1973
1974 ++count;
1975 l = trie_nextleaf(l);
1976 memset(&cb->args[4], 0,
1977 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1958 } 1978 }
1979 cb->args[3] = count;
1959 rcu_read_unlock(); 1980 rcu_read_unlock();
1960 cb->args[2] = m; 1981
1961 return skb->len; 1982 return skb->len;
1962out:
1963 rcu_read_unlock();
1964 return -1;
1965} 1983}
1966 1984
1967/* Fix more generic FIB names for init later */ 1985void __init fib_hash_init(void)
1986{
1987 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1988 sizeof(struct fib_alias),
1989 0, SLAB_PANIC, NULL);
1968 1990
1969#ifdef CONFIG_IP_MULTIPLE_TABLES 1991 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1970struct fib_table * fib_hash_init(u32 id) 1992 max(sizeof(struct leaf),
1971#else 1993 sizeof(struct leaf_info)),
1972struct fib_table * __init fib_hash_init(u32 id) 1994 0, SLAB_PANIC, NULL);
1973#endif 1995}
1996
1997
1998/* Fix more generic FIB names for init later */
1999struct fib_table *fib_hash_table(u32 id)
1974{ 2000{
1975 struct fib_table *tb; 2001 struct fib_table *tb;
1976 struct trie *t; 2002 struct trie *t;
1977 2003
1978 if (fn_alias_kmem == NULL)
1979 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1980 sizeof(struct fib_alias),
1981 0, SLAB_HWCACHE_ALIGN,
1982 NULL);
1983
1984 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 2004 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1985 GFP_KERNEL); 2005 GFP_KERNEL);
1986 if (tb == NULL) 2006 if (tb == NULL)
1987 return NULL; 2007 return NULL;
1988 2008
1989 tb->tb_id = id; 2009 tb->tb_id = id;
2010 tb->tb_default = -1;
1990 tb->tb_lookup = fn_trie_lookup; 2011 tb->tb_lookup = fn_trie_lookup;
1991 tb->tb_insert = fn_trie_insert; 2012 tb->tb_insert = fn_trie_insert;
1992 tb->tb_delete = fn_trie_delete; 2013 tb->tb_delete = fn_trie_delete;
1993 tb->tb_flush = fn_trie_flush; 2014 tb->tb_flush = fn_trie_flush;
1994 tb->tb_select_default = fn_trie_select_default; 2015 tb->tb_select_default = fn_trie_select_default;
1995 tb->tb_dump = fn_trie_dump; 2016 tb->tb_dump = fn_trie_dump;
1996 memset(tb->tb_data, 0, sizeof(struct trie));
1997 2017
1998 t = (struct trie *) tb->tb_data; 2018 t = (struct trie *) tb->tb_data;
1999 2019 memset(t, 0, sizeof(*t));
2000 trie_init(t);
2001
2002 if (id == RT_TABLE_LOCAL)
2003 trie_local = t;
2004 else if (id == RT_TABLE_MAIN)
2005 trie_main = t;
2006 2020
2007 if (id == RT_TABLE_LOCAL) 2021 if (id == RT_TABLE_LOCAL)
2008 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); 2022 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2009 2023
2010 return tb; 2024 return tb;
2011} 2025}
@@ -2013,6 +2027,8 @@ struct fib_table * __init fib_hash_init(u32 id)
2013#ifdef CONFIG_PROC_FS 2027#ifdef CONFIG_PROC_FS
2014/* Depth first Trie walk iterator */ 2028/* Depth first Trie walk iterator */
2015struct fib_trie_iter { 2029struct fib_trie_iter {
2030 struct seq_net_private p;
2031 struct trie *trie_local, *trie_main;
2016 struct tnode *tnode; 2032 struct tnode *tnode;
2017 struct trie *trie; 2033 struct trie *trie;
2018 unsigned index; 2034 unsigned index;
@@ -2033,7 +2049,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2033 iter->tnode, iter->index, iter->depth); 2049 iter->tnode, iter->index, iter->depth);
2034rescan: 2050rescan:
2035 while (cindex < (1<<tn->bits)) { 2051 while (cindex < (1<<tn->bits)) {
2036 struct node *n = tnode_get_child(tn, cindex); 2052 struct node *n = tnode_get_child_rcu(tn, cindex);
2037 2053
2038 if (n) { 2054 if (n) {
2039 if (IS_LEAF(n)) { 2055 if (IS_LEAF(n)) {
@@ -2052,7 +2068,7 @@ rescan:
2052 } 2068 }
2053 2069
2054 /* Current node exhausted, pop back up */ 2070 /* Current node exhausted, pop back up */
2055 p = node_parent((struct node *)tn); 2071 p = node_parent_rcu((struct node *)tn);
2056 if (p) { 2072 if (p) {
2057 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2073 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2058 tn = p; 2074 tn = p;
@@ -2105,10 +2121,17 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2105 for (n = fib_trie_get_first(&iter, t); n; 2121 for (n = fib_trie_get_first(&iter, t); n;
2106 n = fib_trie_get_next(&iter)) { 2122 n = fib_trie_get_next(&iter)) {
2107 if (IS_LEAF(n)) { 2123 if (IS_LEAF(n)) {
2124 struct leaf *l = (struct leaf *)n;
2125 struct leaf_info *li;
2126 struct hlist_node *tmp;
2127
2108 s->leaves++; 2128 s->leaves++;
2109 s->totdepth += iter.depth; 2129 s->totdepth += iter.depth;
2110 if (iter.depth > s->maxdepth) 2130 if (iter.depth > s->maxdepth)
2111 s->maxdepth = iter.depth; 2131 s->maxdepth = iter.depth;
2132
2133 hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
2134 ++s->prefixes;
2112 } else { 2135 } else {
2113 const struct tnode *tn = (const struct tnode *) n; 2136 const struct tnode *tn = (const struct tnode *) n;
2114 int i; 2137 int i;
@@ -2137,13 +2160,17 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2137 else 2160 else
2138 avdepth = 0; 2161 avdepth = 0;
2139 2162
2140 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); 2163 seq_printf(seq, "\tAver depth: %u.%02d\n",
2164 avdepth / 100, avdepth % 100);
2141 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2165 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2142 2166
2143 seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2167 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2144
2145 bytes = sizeof(struct leaf) * stat->leaves; 2168 bytes = sizeof(struct leaf) * stat->leaves;
2146 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); 2169
2170 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2171 bytes += sizeof(struct leaf_info) * stat->prefixes;
2172
2173 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2147 bytes += sizeof(struct tnode) * stat->tnodes; 2174 bytes += sizeof(struct tnode) * stat->tnodes;
2148 2175
2149 max = MAX_STAT_DEPTH; 2176 max = MAX_STAT_DEPTH;
@@ -2153,60 +2180,89 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2153 pointers = 0; 2180 pointers = 0;
2154 for (i = 1; i <= max; i++) 2181 for (i = 1; i <= max; i++)
2155 if (stat->nodesizes[i] != 0) { 2182 if (stat->nodesizes[i] != 0) {
2156 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); 2183 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2157 pointers += (1<<i) * stat->nodesizes[i]; 2184 pointers += (1<<i) * stat->nodesizes[i];
2158 } 2185 }
2159 seq_putc(seq, '\n'); 2186 seq_putc(seq, '\n');
2160 seq_printf(seq, "\tPointers: %d\n", pointers); 2187 seq_printf(seq, "\tPointers: %u\n", pointers);
2161 2188
2162 bytes += sizeof(struct node *) * pointers; 2189 bytes += sizeof(struct node *) * pointers;
2163 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); 2190 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2164 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); 2191 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2192}
2165 2193
2166#ifdef CONFIG_IP_FIB_TRIE_STATS 2194#ifdef CONFIG_IP_FIB_TRIE_STATS
2167 seq_printf(seq, "Counters:\n---------\n"); 2195static void trie_show_usage(struct seq_file *seq,
2168 seq_printf(seq,"gets = %d\n", t->stats.gets); 2196 const struct trie_use_stats *stats)
2169 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); 2197{
2170 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 2198 seq_printf(seq, "\nCounters:\n---------\n");
2171 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 2199 seq_printf(seq, "gets = %u\n", stats->gets);
2172 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 2200 seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2173 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); 2201 seq_printf(seq, "semantic match passed = %u\n",
2174#ifdef CLEAR_STATS 2202 stats->semantic_match_passed);
2175 memset(&(t->stats), 0, sizeof(t->stats)); 2203 seq_printf(seq, "semantic match miss = %u\n",
2176#endif 2204 stats->semantic_match_miss);
2205 seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2206 seq_printf(seq, "skipped node resize = %u\n\n",
2207 stats->resize_node_skipped);
2208}
2177#endif /* CONFIG_IP_FIB_TRIE_STATS */ 2209#endif /* CONFIG_IP_FIB_TRIE_STATS */
2210
2211static void fib_trie_show(struct seq_file *seq, const char *name,
2212 struct trie *trie)
2213{
2214 struct trie_stat stat;
2215
2216 trie_collect_stats(trie, &stat);
2217 seq_printf(seq, "%s:\n", name);
2218 trie_show_stats(seq, &stat);
2219#ifdef CONFIG_IP_FIB_TRIE_STATS
2220 trie_show_usage(seq, &trie->stats);
2221#endif
2178} 2222}
2179 2223
2180static int fib_triestat_seq_show(struct seq_file *seq, void *v) 2224static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2181{ 2225{
2182 struct trie_stat *stat; 2226 struct net *net = (struct net *)seq->private;
2183 2227 struct fib_table *tb;
2184 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2185 if (!stat)
2186 return -ENOMEM;
2187 2228
2188 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 2229 seq_printf(seq,
2230 "Basic info: size of leaf:"
2231 " %Zd bytes, size of tnode: %Zd bytes.\n",
2189 sizeof(struct leaf), sizeof(struct tnode)); 2232 sizeof(struct leaf), sizeof(struct tnode));
2190 2233
2191 if (trie_local) { 2234 tb = fib_get_table(net, RT_TABLE_LOCAL);
2192 seq_printf(seq, "Local:\n"); 2235 if (tb)
2193 trie_collect_stats(trie_local, stat); 2236 fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
2194 trie_show_stats(seq, stat);
2195 }
2196 2237
2197 if (trie_main) { 2238 tb = fib_get_table(net, RT_TABLE_MAIN);
2198 seq_printf(seq, "Main:\n"); 2239 if (tb)
2199 trie_collect_stats(trie_main, stat); 2240 fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
2200 trie_show_stats(seq, stat);
2201 }
2202 kfree(stat);
2203 2241
2204 return 0; 2242 return 0;
2205} 2243}
2206 2244
2207static int fib_triestat_seq_open(struct inode *inode, struct file *file) 2245static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2208{ 2246{
2209 return single_open(file, fib_triestat_seq_show, NULL); 2247 int err;
2248 struct net *net;
2249
2250 net = get_proc_net(inode);
2251 if (net == NULL)
2252 return -ENXIO;
2253 err = single_open(file, fib_triestat_seq_show, net);
2254 if (err < 0) {
2255 put_net(net);
2256 return err;
2257 }
2258 return 0;
2259}
2260
2261static int fib_triestat_seq_release(struct inode *ino, struct file *f)
2262{
2263 struct seq_file *seq = f->private_data;
2264 put_net(seq->private);
2265 return single_release(ino, f);
2210} 2266}
2211 2267
2212static const struct file_operations fib_triestat_fops = { 2268static const struct file_operations fib_triestat_fops = {
@@ -2214,7 +2270,7 @@ static const struct file_operations fib_triestat_fops = {
2214 .open = fib_triestat_seq_open, 2270 .open = fib_triestat_seq_open,
2215 .read = seq_read, 2271 .read = seq_read,
2216 .llseek = seq_lseek, 2272 .llseek = seq_lseek,
2217 .release = single_release, 2273 .release = fib_triestat_seq_release,
2218}; 2274};
2219 2275
2220static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, 2276static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
@@ -2223,13 +2279,13 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2223 loff_t idx = 0; 2279 loff_t idx = 0;
2224 struct node *n; 2280 struct node *n;
2225 2281
2226 for (n = fib_trie_get_first(iter, trie_local); 2282 for (n = fib_trie_get_first(iter, iter->trie_local);
2227 n; ++idx, n = fib_trie_get_next(iter)) { 2283 n; ++idx, n = fib_trie_get_next(iter)) {
2228 if (pos == idx) 2284 if (pos == idx)
2229 return n; 2285 return n;
2230 } 2286 }
2231 2287
2232 for (n = fib_trie_get_first(iter, trie_main); 2288 for (n = fib_trie_get_first(iter, iter->trie_main);
2233 n; ++idx, n = fib_trie_get_next(iter)) { 2289 n; ++idx, n = fib_trie_get_next(iter)) {
2234 if (pos == idx) 2290 if (pos == idx)
2235 return n; 2291 return n;
@@ -2238,11 +2294,25 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2238} 2294}
2239 2295
2240static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2296static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2297 __acquires(RCU)
2241{ 2298{
2299 struct fib_trie_iter *iter = seq->private;
2300 struct fib_table *tb;
2301
2302 if (!iter->trie_local) {
2303 tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
2304 if (tb)
2305 iter->trie_local = (struct trie *) tb->tb_data;
2306 }
2307 if (!iter->trie_main) {
2308 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
2309 if (tb)
2310 iter->trie_main = (struct trie *) tb->tb_data;
2311 }
2242 rcu_read_lock(); 2312 rcu_read_lock();
2243 if (*pos == 0) 2313 if (*pos == 0)
2244 return SEQ_START_TOKEN; 2314 return SEQ_START_TOKEN;
2245 return fib_trie_get_idx(seq->private, *pos - 1); 2315 return fib_trie_get_idx(iter, *pos - 1);
2246} 2316}
2247 2317
2248static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2318static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -2260,13 +2330,14 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2260 return v; 2330 return v;
2261 2331
2262 /* continue scan in next trie */ 2332 /* continue scan in next trie */
2263 if (iter->trie == trie_local) 2333 if (iter->trie == iter->trie_local)
2264 return fib_trie_get_first(iter, trie_main); 2334 return fib_trie_get_first(iter, iter->trie_main);
2265 2335
2266 return NULL; 2336 return NULL;
2267} 2337}
2268 2338
2269static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2339static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2340 __releases(RCU)
2270{ 2341{
2271 rcu_read_unlock(); 2342 rcu_read_unlock();
2272} 2343}
@@ -2276,10 +2347,8 @@ static void seq_indent(struct seq_file *seq, int n)
2276 while (n-- > 0) seq_puts(seq, " "); 2347 while (n-- > 0) seq_puts(seq, " ");
2277} 2348}
2278 2349
2279static inline const char *rtn_scope(enum rt_scope_t s) 2350static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2280{ 2351{
2281 static char buf[32];
2282
2283 switch (s) { 2352 switch (s) {
2284 case RT_SCOPE_UNIVERSE: return "universe"; 2353 case RT_SCOPE_UNIVERSE: return "universe";
2285 case RT_SCOPE_SITE: return "site"; 2354 case RT_SCOPE_SITE: return "site";
@@ -2287,7 +2356,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
2287 case RT_SCOPE_HOST: return "host"; 2356 case RT_SCOPE_HOST: return "host";
2288 case RT_SCOPE_NOWHERE: return "nowhere"; 2357 case RT_SCOPE_NOWHERE: return "nowhere";
2289 default: 2358 default:
2290 snprintf(buf, sizeof(buf), "scope=%d", s); 2359 snprintf(buf, len, "scope=%d", s);
2291 return buf; 2360 return buf;
2292 } 2361 }
2293} 2362}
@@ -2307,13 +2376,11 @@ static const char *rtn_type_names[__RTN_MAX] = {
2307 [RTN_XRESOLVE] = "XRESOLVE", 2376 [RTN_XRESOLVE] = "XRESOLVE",
2308}; 2377};
2309 2378
2310static inline const char *rtn_type(unsigned t) 2379static inline const char *rtn_type(char *buf, size_t len, unsigned t)
2311{ 2380{
2312 static char buf[32];
2313
2314 if (t < __RTN_MAX && rtn_type_names[t]) 2381 if (t < __RTN_MAX && rtn_type_names[t])
2315 return rtn_type_names[t]; 2382 return rtn_type_names[t];
2316 snprintf(buf, sizeof(buf), "type %d", t); 2383 snprintf(buf, len, "type %u", t);
2317 return buf; 2384 return buf;
2318} 2385}
2319 2386
@@ -2326,8 +2393,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2326 if (v == SEQ_START_TOKEN) 2393 if (v == SEQ_START_TOKEN)
2327 return 0; 2394 return 0;
2328 2395
2329 if (!node_parent(n)) { 2396 if (!node_parent_rcu(n)) {
2330 if (iter->trie == trie_local) 2397 if (iter->trie == iter->trie_local)
2331 seq_puts(seq, "<local>:\n"); 2398 seq_puts(seq, "<local>:\n");
2332 else 2399 else
2333 seq_puts(seq, "<main>:\n"); 2400 seq_puts(seq, "<main>:\n");
@@ -2344,25 +2411,29 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2344 2411
2345 } else { 2412 } else {
2346 struct leaf *l = (struct leaf *) n; 2413 struct leaf *l = (struct leaf *) n;
2347 int i; 2414 struct leaf_info *li;
2415 struct hlist_node *node;
2348 __be32 val = htonl(l->key); 2416 __be32 val = htonl(l->key);
2349 2417
2350 seq_indent(seq, iter->depth); 2418 seq_indent(seq, iter->depth);
2351 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 2419 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2352 for (i = 32; i >= 0; i--) { 2420
2353 struct leaf_info *li = find_leaf_info(l, i); 2421 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2354 if (li) { 2422 struct fib_alias *fa;
2355 struct fib_alias *fa; 2423
2356 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2424 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2357 seq_indent(seq, iter->depth+1); 2425 char buf1[32], buf2[32];
2358 seq_printf(seq, " /%d %s %s", i, 2426
2359 rtn_scope(fa->fa_scope), 2427 seq_indent(seq, iter->depth+1);
2360 rtn_type(fa->fa_type)); 2428 seq_printf(seq, " /%d %s %s", li->plen,
2361 if (fa->fa_tos) 2429 rtn_scope(buf1, sizeof(buf1),
2362 seq_printf(seq, "tos =%d\n", 2430 fa->fa_scope),
2363 fa->fa_tos); 2431 rtn_type(buf2, sizeof(buf2),
2364 seq_putc(seq, '\n'); 2432 fa->fa_type));
2365 } 2433 if (fa->fa_tos)
2434 seq_printf(seq, "tos =%d\n",
2435 fa->fa_tos);
2436 seq_putc(seq, '\n');
2366 } 2437 }
2367 } 2438 }
2368 } 2439 }
@@ -2379,8 +2450,8 @@ static const struct seq_operations fib_trie_seq_ops = {
2379 2450
2380static int fib_trie_seq_open(struct inode *inode, struct file *file) 2451static int fib_trie_seq_open(struct inode *inode, struct file *file)
2381{ 2452{
2382 return seq_open_private(file, &fib_trie_seq_ops, 2453 return seq_open_net(inode, file, &fib_trie_seq_ops,
2383 sizeof(struct fib_trie_iter)); 2454 sizeof(struct fib_trie_iter));
2384} 2455}
2385 2456
2386static const struct file_operations fib_trie_fops = { 2457static const struct file_operations fib_trie_fops = {
@@ -2388,7 +2459,7 @@ static const struct file_operations fib_trie_fops = {
2388 .open = fib_trie_seq_open, 2459 .open = fib_trie_seq_open,
2389 .read = seq_read, 2460 .read = seq_read,
2390 .llseek = seq_lseek, 2461 .llseek = seq_lseek,
2391 .release = seq_release_private, 2462 .release = seq_release_net,
2392}; 2463};
2393 2464
2394static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2465static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
@@ -2416,8 +2487,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2416{ 2487{
2417 const struct fib_trie_iter *iter = seq->private; 2488 const struct fib_trie_iter *iter = seq->private;
2418 struct leaf *l = v; 2489 struct leaf *l = v;
2419 int i; 2490 struct leaf_info *li;
2420 char bf[128]; 2491 struct hlist_node *node;
2421 2492
2422 if (v == SEQ_START_TOKEN) { 2493 if (v == SEQ_START_TOKEN) {
2423 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2494 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2426,25 +2497,23 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2426 return 0; 2497 return 0;
2427 } 2498 }
2428 2499
2429 if (iter->trie == trie_local) 2500 if (iter->trie == iter->trie_local)
2430 return 0; 2501 return 0;
2502
2431 if (IS_TNODE(l)) 2503 if (IS_TNODE(l))
2432 return 0; 2504 return 0;
2433 2505
2434 for (i=32; i>=0; i--) { 2506 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2435 struct leaf_info *li = find_leaf_info(l, i);
2436 struct fib_alias *fa; 2507 struct fib_alias *fa;
2437 __be32 mask, prefix; 2508 __be32 mask, prefix;
2438 2509
2439 if (!li)
2440 continue;
2441
2442 mask = inet_make_mask(li->plen); 2510 mask = inet_make_mask(li->plen);
2443 prefix = htonl(l->key); 2511 prefix = htonl(l->key);
2444 2512
2445 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2513 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2446 const struct fib_info *fi = fa->fa_info; 2514 const struct fib_info *fi = fa->fa_info;
2447 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2515 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2516 char bf[128];
2448 2517
2449 if (fa->fa_type == RTN_BROADCAST 2518 if (fa->fa_type == RTN_BROADCAST
2450 || fa->fa_type == RTN_MULTICAST) 2519 || fa->fa_type == RTN_MULTICAST)
@@ -2458,7 +2527,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2458 fi->fib_nh->nh_gw, flags, 0, 0, 2527 fi->fib_nh->nh_gw, flags, 0, 0,
2459 fi->fib_priority, 2528 fi->fib_priority,
2460 mask, 2529 mask,
2461 (fi->fib_advmss ? fi->fib_advmss + 40 : 0), 2530 (fi->fib_advmss ?
2531 fi->fib_advmss + 40 : 0),
2462 fi->fib_window, 2532 fi->fib_window,
2463 fi->fib_rtt >> 3); 2533 fi->fib_rtt >> 3);
2464 else 2534 else
@@ -2483,8 +2553,8 @@ static const struct seq_operations fib_route_seq_ops = {
2483 2553
2484static int fib_route_seq_open(struct inode *inode, struct file *file) 2554static int fib_route_seq_open(struct inode *inode, struct file *file)
2485{ 2555{
2486 return seq_open_private(file, &fib_route_seq_ops, 2556 return seq_open_net(inode, file, &fib_route_seq_ops,
2487 sizeof(struct fib_trie_iter)); 2557 sizeof(struct fib_trie_iter));
2488} 2558}
2489 2559
2490static const struct file_operations fib_route_fops = { 2560static const struct file_operations fib_route_fops = {
@@ -2492,35 +2562,36 @@ static const struct file_operations fib_route_fops = {
2492 .open = fib_route_seq_open, 2562 .open = fib_route_seq_open,
2493 .read = seq_read, 2563 .read = seq_read,
2494 .llseek = seq_lseek, 2564 .llseek = seq_lseek,
2495 .release = seq_release_private, 2565 .release = seq_release_net,
2496}; 2566};
2497 2567
2498int __init fib_proc_init(void) 2568int __net_init fib_proc_init(struct net *net)
2499{ 2569{
2500 if (!proc_net_fops_create(&init_net, "fib_trie", S_IRUGO, &fib_trie_fops)) 2570 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
2501 goto out1; 2571 goto out1;
2502 2572
2503 if (!proc_net_fops_create(&init_net, "fib_triestat", S_IRUGO, &fib_triestat_fops)) 2573 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2574 &fib_triestat_fops))
2504 goto out2; 2575 goto out2;
2505 2576
2506 if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_route_fops)) 2577 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
2507 goto out3; 2578 goto out3;
2508 2579
2509 return 0; 2580 return 0;
2510 2581
2511out3: 2582out3:
2512 proc_net_remove(&init_net, "fib_triestat"); 2583 proc_net_remove(net, "fib_triestat");
2513out2: 2584out2:
2514 proc_net_remove(&init_net, "fib_trie"); 2585 proc_net_remove(net, "fib_trie");
2515out1: 2586out1:
2516 return -ENOMEM; 2587 return -ENOMEM;
2517} 2588}
2518 2589
2519void __init fib_proc_exit(void) 2590void __net_exit fib_proc_exit(struct net *net)
2520{ 2591{
2521 proc_net_remove(&init_net, "fib_trie"); 2592 proc_net_remove(net, "fib_trie");
2522 proc_net_remove(&init_net, "fib_triestat"); 2593 proc_net_remove(net, "fib_triestat");
2523 proc_net_remove(&init_net, "route"); 2594 proc_net_remove(net, "route");
2524} 2595}
2525 2596
2526#endif /* CONFIG_PROC_FS */ 2597#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 233de0634298..a7321a82df6d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -92,6 +92,7 @@
92#include <asm/system.h> 92#include <asm/system.h>
93#include <asm/uaccess.h> 93#include <asm/uaccess.h>
94#include <net/checksum.h> 94#include <net/checksum.h>
95#include <net/xfrm.h>
95 96
96/* 97/*
97 * Build xmit assembly blocks 98 * Build xmit assembly blocks
@@ -231,7 +232,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
231static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; 232static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
232#define icmp_socket __get_cpu_var(__icmp_socket) 233#define icmp_socket __get_cpu_var(__icmp_socket)
233 234
234static __inline__ int icmp_xmit_lock(void) 235static inline int icmp_xmit_lock(void)
235{ 236{
236 local_bh_disable(); 237 local_bh_disable();
237 238
@@ -245,7 +246,7 @@ static __inline__ int icmp_xmit_lock(void)
245 return 0; 246 return 0;
246} 247}
247 248
248static void icmp_xmit_unlock(void) 249static inline void icmp_xmit_unlock(void)
249{ 250{
250 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); 251 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
251} 252}
@@ -274,18 +275,19 @@ static void icmp_xmit_unlock(void)
274#define XRLIM_BURST_FACTOR 6 275#define XRLIM_BURST_FACTOR 6
275int xrlim_allow(struct dst_entry *dst, int timeout) 276int xrlim_allow(struct dst_entry *dst, int timeout)
276{ 277{
277 unsigned long now; 278 unsigned long now, token = dst->rate_tokens;
278 int rc = 0; 279 int rc = 0;
279 280
280 now = jiffies; 281 now = jiffies;
281 dst->rate_tokens += now - dst->rate_last; 282 token += now - dst->rate_last;
282 dst->rate_last = now; 283 dst->rate_last = now;
283 if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout) 284 if (token > XRLIM_BURST_FACTOR * timeout)
284 dst->rate_tokens = XRLIM_BURST_FACTOR * timeout; 285 token = XRLIM_BURST_FACTOR * timeout;
285 if (dst->rate_tokens >= timeout) { 286 if (token >= timeout) {
286 dst->rate_tokens -= timeout; 287 token -= timeout;
287 rc = 1; 288 rc = 1;
288 } 289 }
290 dst->rate_tokens = token;
289 return rc; 291 return rc;
290} 292}
291 293
@@ -403,7 +405,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
403 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 405 .tos = RT_TOS(ip_hdr(skb)->tos) } },
404 .proto = IPPROTO_ICMP }; 406 .proto = IPPROTO_ICMP };
405 security_skb_classify_flow(skb, &fl); 407 security_skb_classify_flow(skb, &fl);
406 if (ip_route_output_key(&rt, &fl)) 408 if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl))
407 goto out_unlock; 409 goto out_unlock;
408 } 410 }
409 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 411 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
@@ -435,9 +437,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
435 struct ipcm_cookie ipc; 437 struct ipcm_cookie ipc;
436 __be32 saddr; 438 __be32 saddr;
437 u8 tos; 439 u8 tos;
440 struct net *net;
438 441
439 if (!rt) 442 if (!rt)
440 goto out; 443 goto out;
444 net = rt->u.dst.dev->nd_net;
441 445
442 /* 446 /*
443 * Find the original header. It is expected to be valid, of course. 447 * Find the original header. It is expected to be valid, of course.
@@ -513,7 +517,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
513 struct net_device *dev = NULL; 517 struct net_device *dev = NULL;
514 518
515 if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) 519 if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
516 dev = dev_get_by_index(&init_net, rt->fl.iif); 520 dev = dev_get_by_index(net, rt->fl.iif);
517 521
518 if (dev) { 522 if (dev) {
519 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); 523 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -540,7 +544,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
540 icmp_param.data.icmph.checksum = 0; 544 icmp_param.data.icmph.checksum = 0;
541 icmp_param.skb = skb_in; 545 icmp_param.skb = skb_in;
542 icmp_param.offset = skb_network_offset(skb_in); 546 icmp_param.offset = skb_network_offset(skb_in);
543 icmp_out_count(icmp_param.data.icmph.type);
544 inet_sk(icmp_socket->sk)->tos = tos; 547 inet_sk(icmp_socket->sk)->tos = tos;
545 ipc.addr = iph->saddr; 548 ipc.addr = iph->saddr;
546 ipc.opt = &icmp_param.replyopts; 549 ipc.opt = &icmp_param.replyopts;
@@ -564,11 +567,71 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
564 } 567 }
565 } 568 }
566 }; 569 };
570 int err;
571 struct rtable *rt2;
572
567 security_skb_classify_flow(skb_in, &fl); 573 security_skb_classify_flow(skb_in, &fl);
568 if (ip_route_output_key(&rt, &fl)) 574 if (__ip_route_output_key(net, &rt, &fl))
575 goto out_unlock;
576
577 /* No need to clone since we're just using its address. */
578 rt2 = rt;
579
580 err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
581 switch (err) {
582 case 0:
583 if (rt != rt2)
584 goto route_done;
585 break;
586 case -EPERM:
587 rt = NULL;
588 break;
589 default:
590 goto out_unlock;
591 }
592
593 if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
594 goto out_unlock;
595
596 if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
597 err = __ip_route_output_key(net, &rt2, &fl);
598 else {
599 struct flowi fl2 = {};
600 struct dst_entry *odst;
601
602 fl2.fl4_dst = fl.fl4_src;
603 if (ip_route_output_key(net, &rt2, &fl2))
604 goto out_unlock;
605
606 /* Ugh! */
607 odst = skb_in->dst;
608 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
609 RT_TOS(tos), rt2->u.dst.dev);
610
611 dst_release(&rt2->u.dst);
612 rt2 = (struct rtable *)skb_in->dst;
613 skb_in->dst = odst;
614 }
615
616 if (err)
617 goto out_unlock;
618
619 err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL,
620 XFRM_LOOKUP_ICMP);
621 if (err == -ENOENT) {
622 if (!rt)
623 goto out_unlock;
624 goto route_done;
625 }
626
627 dst_release(&rt->u.dst);
628 rt = rt2;
629
630 if (err)
569 goto out_unlock; 631 goto out_unlock;
570 } 632 }
571 633
634route_done:
572 if (!icmpv4_xrlim_allow(rt, type, code)) 635 if (!icmpv4_xrlim_allow(rt, type, code))
573 goto ende; 636 goto ende;
574 637
@@ -604,8 +667,10 @@ static void icmp_unreach(struct sk_buff *skb)
604 struct icmphdr *icmph; 667 struct icmphdr *icmph;
605 int hash, protocol; 668 int hash, protocol;
606 struct net_protocol *ipprot; 669 struct net_protocol *ipprot;
607 struct sock *raw_sk;
608 u32 info = 0; 670 u32 info = 0;
671 struct net *net;
672
673 net = skb->dst->dev->nd_net;
609 674
610 /* 675 /*
611 * Incomplete header ? 676 * Incomplete header ?
@@ -636,7 +701,7 @@ static void icmp_unreach(struct sk_buff *skb)
636 "and DF set.\n", 701 "and DF set.\n",
637 NIPQUAD(iph->daddr)); 702 NIPQUAD(iph->daddr));
638 } else { 703 } else {
639 info = ip_rt_frag_needed(iph, 704 info = ip_rt_frag_needed(net, iph,
640 ntohs(icmph->un.frag.mtu)); 705 ntohs(icmph->un.frag.mtu));
641 if (!info) 706 if (!info)
642 goto out; 707 goto out;
@@ -674,7 +739,7 @@ static void icmp_unreach(struct sk_buff *skb)
674 */ 739 */
675 740
676 if (!sysctl_icmp_ignore_bogus_error_responses && 741 if (!sysctl_icmp_ignore_bogus_error_responses &&
677 inet_addr_type(iph->daddr) == RTN_BROADCAST) { 742 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
678 if (net_ratelimit()) 743 if (net_ratelimit())
679 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " 744 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
680 "type %u, code %u " 745 "type %u, code %u "
@@ -698,21 +763,9 @@ static void icmp_unreach(struct sk_buff *skb)
698 /* 763 /*
699 * Deliver ICMP message to raw sockets. Pretty useless feature? 764 * Deliver ICMP message to raw sockets. Pretty useless feature?
700 */ 765 */
766 raw_icmp_error(skb, protocol, info);
701 767
702 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
703 hash = protocol & (MAX_INET_PROTOS - 1); 768 hash = protocol & (MAX_INET_PROTOS - 1);
704 read_lock(&raw_v4_lock);
705 if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
706 while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
707 iph->saddr,
708 skb->dev->ifindex)) != NULL) {
709 raw_err(raw_sk, skb, info);
710 raw_sk = sk_next(raw_sk);
711 iph = (struct iphdr *)skb->data;
712 }
713 }
714 read_unlock(&raw_v4_lock);
715
716 rcu_read_lock(); 769 rcu_read_lock();
717 ipprot = rcu_dereference(inet_protos[hash]); 770 ipprot = rcu_dereference(inet_protos[hash]);
718 if (ipprot && ipprot->err_handler) 771 if (ipprot && ipprot->err_handler)
@@ -930,6 +983,25 @@ int icmp_rcv(struct sk_buff *skb)
930 struct icmphdr *icmph; 983 struct icmphdr *icmph;
931 struct rtable *rt = (struct rtable *)skb->dst; 984 struct rtable *rt = (struct rtable *)skb->dst;
932 985
986 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
987 int nh;
988
989 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
990 XFRM_STATE_ICMP))
991 goto drop;
992
993 if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
994 goto drop;
995
996 nh = skb_network_offset(skb);
997 skb_set_network_header(skb, sizeof(*icmph));
998
999 if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
1000 goto drop;
1001
1002 skb_set_network_header(skb, nh);
1003 }
1004
933 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); 1005 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
934 1006
935 switch (skb->ip_summed) { 1007 switch (skb->ip_summed) {
@@ -943,8 +1015,7 @@ int icmp_rcv(struct sk_buff *skb)
943 goto error; 1015 goto error;
944 } 1016 }
945 1017
946 if (!pskb_pull(skb, sizeof(struct icmphdr))) 1018 __skb_pull(skb, sizeof(*icmph));
947 goto error;
948 1019
949 icmph = icmp_hdr(skb); 1020 icmph = icmp_hdr(skb);
950 1021
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7dbc282d4f9f..994648be80ab 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -130,12 +130,12 @@
130 */ 130 */
131 131
132#define IGMP_V1_SEEN(in_dev) \ 132#define IGMP_V1_SEEN(in_dev) \
133 (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \ 133 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \
134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ 134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
135 ((in_dev)->mr_v1_seen && \ 135 ((in_dev)->mr_v1_seen && \
136 time_before(jiffies, (in_dev)->mr_v1_seen))) 136 time_before(jiffies, (in_dev)->mr_v1_seen)))
137#define IGMP_V2_SEEN(in_dev) \ 137#define IGMP_V2_SEEN(in_dev) \
138 (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \ 138 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \
139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ 139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
140 ((in_dev)->mr_v2_seen && \ 140 ((in_dev)->mr_v2_seen && \
141 time_before(jiffies, (in_dev)->mr_v2_seen))) 141 time_before(jiffies, (in_dev)->mr_v2_seen)))
@@ -301,7 +301,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
301 .nl_u = { .ip4_u = { 301 .nl_u = { .ip4_u = {
302 .daddr = IGMPV3_ALL_MCR } }, 302 .daddr = IGMPV3_ALL_MCR } },
303 .proto = IPPROTO_IGMP }; 303 .proto = IPPROTO_IGMP };
304 if (ip_route_output_key(&rt, &fl)) { 304 if (ip_route_output_key(&init_net, &rt, &fl)) {
305 kfree_skb(skb); 305 kfree_skb(skb);
306 return NULL; 306 return NULL;
307 } 307 }
@@ -349,17 +349,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
349 349
350static int igmpv3_sendpack(struct sk_buff *skb) 350static int igmpv3_sendpack(struct sk_buff *skb)
351{ 351{
352 struct iphdr *pip = ip_hdr(skb);
353 struct igmphdr *pig = igmp_hdr(skb); 352 struct igmphdr *pig = igmp_hdr(skb);
354 const int iplen = skb->tail - skb->network_header;
355 const int igmplen = skb->tail - skb->transport_header; 353 const int igmplen = skb->tail - skb->transport_header;
356 354
357 pip->tot_len = htons(iplen);
358 ip_send_check(pip);
359 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); 355 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
360 356
361 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev, 357 return ip_local_out(skb);
362 dst_output);
363} 358}
364 359
365static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) 360static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
@@ -650,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
650 struct flowi fl = { .oif = dev->ifindex, 645 struct flowi fl = { .oif = dev->ifindex,
651 .nl_u = { .ip4_u = { .daddr = dst } }, 646 .nl_u = { .ip4_u = { .daddr = dst } },
652 .proto = IPPROTO_IGMP }; 647 .proto = IPPROTO_IGMP };
653 if (ip_route_output_key(&rt, &fl)) 648 if (ip_route_output_key(&init_net, &rt, &fl))
654 return -1; 649 return -1;
655 } 650 }
656 if (rt->rt_src == 0) { 651 if (rt->rt_src == 0) {
@@ -680,13 +675,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
680 iph->daddr = dst; 675 iph->daddr = dst;
681 iph->saddr = rt->rt_src; 676 iph->saddr = rt->rt_src;
682 iph->protocol = IPPROTO_IGMP; 677 iph->protocol = IPPROTO_IGMP;
683 iph->tot_len = htons(IGMP_SIZE);
684 ip_select_ident(iph, &rt->u.dst, NULL); 678 ip_select_ident(iph, &rt->u.dst, NULL);
685 ((u8*)&iph[1])[0] = IPOPT_RA; 679 ((u8*)&iph[1])[0] = IPOPT_RA;
686 ((u8*)&iph[1])[1] = 4; 680 ((u8*)&iph[1])[1] = 4;
687 ((u8*)&iph[1])[2] = 0; 681 ((u8*)&iph[1])[2] = 0;
688 ((u8*)&iph[1])[3] = 0; 682 ((u8*)&iph[1])[3] = 0;
689 ip_send_check(iph);
690 683
691 ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 684 ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
692 ih->type=type; 685 ih->type=type;
@@ -695,8 +688,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
695 ih->group=group; 688 ih->group=group;
696 ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); 689 ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
697 690
698 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 691 return ip_local_out(skb);
699 dst_output);
700} 692}
701 693
702static void igmp_gq_timer_expire(unsigned long data) 694static void igmp_gq_timer_expire(unsigned long data)
@@ -1234,9 +1226,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1234 spin_lock_init(&im->lock); 1226 spin_lock_init(&im->lock);
1235#ifdef CONFIG_IP_MULTICAST 1227#ifdef CONFIG_IP_MULTICAST
1236 im->tm_running=0; 1228 im->tm_running=0;
1237 init_timer(&im->timer); 1229 setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
1238 im->timer.data=(unsigned long)im;
1239 im->timer.function=&igmp_timer_expire;
1240 im->unsolicit_count = IGMP_Unsolicited_Report_Count; 1230 im->unsolicit_count = IGMP_Unsolicited_Report_Count;
1241 im->reporter = 0; 1231 im->reporter = 0;
1242 im->gsquery = 0; 1232 im->gsquery = 0;
@@ -1338,13 +1328,11 @@ void ip_mc_init_dev(struct in_device *in_dev)
1338 in_dev->mc_tomb = NULL; 1328 in_dev->mc_tomb = NULL;
1339#ifdef CONFIG_IP_MULTICAST 1329#ifdef CONFIG_IP_MULTICAST
1340 in_dev->mr_gq_running = 0; 1330 in_dev->mr_gq_running = 0;
1341 init_timer(&in_dev->mr_gq_timer); 1331 setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire,
1342 in_dev->mr_gq_timer.data=(unsigned long) in_dev; 1332 (unsigned long)in_dev);
1343 in_dev->mr_gq_timer.function=&igmp_gq_timer_expire;
1344 in_dev->mr_ifc_count = 0; 1333 in_dev->mr_ifc_count = 0;
1345 init_timer(&in_dev->mr_ifc_timer); 1334 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
1346 in_dev->mr_ifc_timer.data=(unsigned long) in_dev; 1335 (unsigned long)in_dev);
1347 in_dev->mr_ifc_timer.function=&igmp_ifc_timer_expire;
1348 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; 1336 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
1349#endif 1337#endif
1350 1338
@@ -1401,19 +1389,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
1401 struct in_device *idev = NULL; 1389 struct in_device *idev = NULL;
1402 1390
1403 if (imr->imr_ifindex) { 1391 if (imr->imr_ifindex) {
1404 idev = inetdev_by_index(imr->imr_ifindex); 1392 idev = inetdev_by_index(&init_net, imr->imr_ifindex);
1405 if (idev) 1393 if (idev)
1406 __in_dev_put(idev); 1394 __in_dev_put(idev);
1407 return idev; 1395 return idev;
1408 } 1396 }
1409 if (imr->imr_address.s_addr) { 1397 if (imr->imr_address.s_addr) {
1410 dev = ip_dev_find(imr->imr_address.s_addr); 1398 dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
1411 if (!dev) 1399 if (!dev)
1412 return NULL; 1400 return NULL;
1413 dev_put(dev); 1401 dev_put(dev);
1414 } 1402 }
1415 1403
1416 if (!dev && !ip_route_output_key(&rt, &fl)) { 1404 if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) {
1417 dev = rt->u.dst.dev; 1405 dev = rt->u.dst.dev;
1418 ip_rt_put(rt); 1406 ip_rt_put(rt);
1419 } 1407 }
@@ -1754,7 +1742,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1754 int ifindex; 1742 int ifindex;
1755 int count = 0; 1743 int count = 0;
1756 1744
1757 if (!MULTICAST(addr)) 1745 if (!ipv4_is_multicast(addr))
1758 return -EINVAL; 1746 return -EINVAL;
1759 1747
1760 rtnl_lock(); 1748 rtnl_lock();
@@ -1867,7 +1855,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1867 int leavegroup = 0; 1855 int leavegroup = 0;
1868 int i, j, rv; 1856 int i, j, rv;
1869 1857
1870 if (!MULTICAST(addr)) 1858 if (!ipv4_is_multicast(addr))
1871 return -EINVAL; 1859 return -EINVAL;
1872 1860
1873 rtnl_lock(); 1861 rtnl_lock();
@@ -1997,7 +1985,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1997 struct ip_sf_socklist *newpsl, *psl; 1985 struct ip_sf_socklist *newpsl, *psl;
1998 int leavegroup = 0; 1986 int leavegroup = 0;
1999 1987
2000 if (!MULTICAST(addr)) 1988 if (!ipv4_is_multicast(addr))
2001 return -EINVAL; 1989 return -EINVAL;
2002 if (msf->imsf_fmode != MCAST_INCLUDE && 1990 if (msf->imsf_fmode != MCAST_INCLUDE &&
2003 msf->imsf_fmode != MCAST_EXCLUDE) 1991 msf->imsf_fmode != MCAST_EXCLUDE)
@@ -2080,7 +2068,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2080 struct inet_sock *inet = inet_sk(sk); 2068 struct inet_sock *inet = inet_sk(sk);
2081 struct ip_sf_socklist *psl; 2069 struct ip_sf_socklist *psl;
2082 2070
2083 if (!MULTICAST(addr)) 2071 if (!ipv4_is_multicast(addr))
2084 return -EINVAL; 2072 return -EINVAL;
2085 2073
2086 rtnl_lock(); 2074 rtnl_lock();
@@ -2142,7 +2130,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2142 if (psin->sin_family != AF_INET) 2130 if (psin->sin_family != AF_INET)
2143 return -EINVAL; 2131 return -EINVAL;
2144 addr = psin->sin_addr.s_addr; 2132 addr = psin->sin_addr.s_addr;
2145 if (!MULTICAST(addr)) 2133 if (!ipv4_is_multicast(addr))
2146 return -EINVAL; 2134 return -EINVAL;
2147 2135
2148 rtnl_lock(); 2136 rtnl_lock();
@@ -2192,7 +2180,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
2192 struct ip_sf_socklist *psl; 2180 struct ip_sf_socklist *psl;
2193 int i; 2181 int i;
2194 2182
2195 if (!MULTICAST(loc_addr)) 2183 if (!ipv4_is_multicast(loc_addr))
2196 return 1; 2184 return 1;
2197 2185
2198 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 2186 for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
@@ -2234,7 +2222,7 @@ void ip_mc_drop_socket(struct sock *sk)
2234 struct in_device *in_dev; 2222 struct in_device *in_dev;
2235 inet->mc_list = iml->next; 2223 inet->mc_list = iml->next;
2236 2224
2237 in_dev = inetdev_by_index(iml->multi.imr_ifindex); 2225 in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
2238 (void) ip_mc_leave_src(sk, iml, in_dev); 2226 (void) ip_mc_leave_src(sk, iml, in_dev);
2239 if (in_dev != NULL) { 2227 if (in_dev != NULL) {
2240 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2228 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
@@ -2341,6 +2329,7 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos)
2341} 2329}
2342 2330
2343static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) 2331static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos)
2332 __acquires(dev_base_lock)
2344{ 2333{
2345 read_lock(&dev_base_lock); 2334 read_lock(&dev_base_lock);
2346 return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2335 return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2358,6 +2347,7 @@ static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2358} 2347}
2359 2348
2360static void igmp_mc_seq_stop(struct seq_file *seq, void *v) 2349static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
2350 __releases(dev_base_lock)
2361{ 2351{
2362 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); 2352 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
2363 if (likely(state->in_dev != NULL)) { 2353 if (likely(state->in_dev != NULL)) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8fb6ca23700a..de5a41de191a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -87,6 +87,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
87 struct hlist_node *node; 87 struct hlist_node *node;
88 struct inet_bind_bucket *tb; 88 struct inet_bind_bucket *tb;
89 int ret; 89 int ret;
90 struct net *net = sk->sk_net;
90 91
91 local_bh_disable(); 92 local_bh_disable();
92 if (!snum) { 93 if (!snum) {
@@ -100,7 +101,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
100 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; 101 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
101 spin_lock(&head->lock); 102 spin_lock(&head->lock);
102 inet_bind_bucket_for_each(tb, node, &head->chain) 103 inet_bind_bucket_for_each(tb, node, &head->chain)
103 if (tb->port == rover) 104 if (tb->ib_net == net && tb->port == rover)
104 goto next; 105 goto next;
105 break; 106 break;
106 next: 107 next:
@@ -127,7 +128,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
127 head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; 128 head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
128 spin_lock(&head->lock); 129 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain) 130 inet_bind_bucket_for_each(tb, node, &head->chain)
130 if (tb->port == snum) 131 if (tb->ib_net == net && tb->port == snum)
131 goto tb_found; 132 goto tb_found;
132 } 133 }
133 tb = NULL; 134 tb = NULL;
@@ -147,7 +148,8 @@ tb_found:
147 } 148 }
148tb_not_found: 149tb_not_found:
149 ret = 1; 150 ret = 1;
150 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) 151 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
152 net, head, snum)) == NULL)
151 goto fail_unlock; 153 goto fail_unlock;
152 if (hlist_empty(&tb->owners)) { 154 if (hlist_empty(&tb->owners)) {
153 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) 155 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
@@ -277,18 +279,11 @@ void inet_csk_init_xmit_timers(struct sock *sk,
277{ 279{
278 struct inet_connection_sock *icsk = inet_csk(sk); 280 struct inet_connection_sock *icsk = inet_csk(sk);
279 281
280 init_timer(&icsk->icsk_retransmit_timer); 282 setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
281 init_timer(&icsk->icsk_delack_timer); 283 (unsigned long)sk);
282 init_timer(&sk->sk_timer); 284 setup_timer(&icsk->icsk_delack_timer, delack_handler,
283 285 (unsigned long)sk);
284 icsk->icsk_retransmit_timer.function = retransmit_handler; 286 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
285 icsk->icsk_delack_timer.function = delack_handler;
286 sk->sk_timer.function = keepalive_handler;
287
288 icsk->icsk_retransmit_timer.data =
289 icsk->icsk_delack_timer.data =
290 sk->sk_timer.data = (unsigned long)sk;
291
292 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 287 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
293} 288}
294 289
@@ -340,7 +335,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
340 .dport = ireq->rmt_port } } }; 335 .dport = ireq->rmt_port } } };
341 336
342 security_req_classify_flow(req, &fl); 337 security_req_classify_flow(req, &fl);
343 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 338 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
344 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 339 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
345 return NULL; 340 return NULL;
346 } 341 }
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index dc429b6b0ba6..da97695e7096 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -51,6 +51,29 @@ static struct sock *idiagnl;
51#define INET_DIAG_PUT(skb, attrtype, attrlen) \ 51#define INET_DIAG_PUT(skb, attrtype, attrlen) \
52 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) 52 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
53 53
54static DEFINE_MUTEX(inet_diag_table_mutex);
55
56static const struct inet_diag_handler *inet_diag_lock_handler(int type)
57{
58#ifdef CONFIG_KMOD
59 if (!inet_diag_table[type])
60 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
61 NETLINK_INET_DIAG, type);
62#endif
63
64 mutex_lock(&inet_diag_table_mutex);
65 if (!inet_diag_table[type])
66 return ERR_PTR(-ENOENT);
67
68 return inet_diag_table[type];
69}
70
71static inline void inet_diag_unlock_handler(
72 const struct inet_diag_handler *handler)
73{
74 mutex_unlock(&inet_diag_table_mutex);
75}
76
54static int inet_csk_diag_fill(struct sock *sk, 77static int inet_csk_diag_fill(struct sock *sk,
55 struct sk_buff *skb, 78 struct sk_buff *skb,
56 int ext, u32 pid, u32 seq, u16 nlmsg_flags, 79 int ext, u32 pid, u32 seq, u16 nlmsg_flags,
@@ -235,18 +258,23 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
235 struct inet_hashinfo *hashinfo; 258 struct inet_hashinfo *hashinfo;
236 const struct inet_diag_handler *handler; 259 const struct inet_diag_handler *handler;
237 260
238 handler = inet_diag_table[nlh->nlmsg_type]; 261 handler = inet_diag_lock_handler(nlh->nlmsg_type);
239 BUG_ON(handler == NULL); 262 if (IS_ERR(handler)) {
263 err = PTR_ERR(handler);
264 goto unlock;
265 }
266
240 hashinfo = handler->idiag_hashinfo; 267 hashinfo = handler->idiag_hashinfo;
268 err = -EINVAL;
241 269
242 if (req->idiag_family == AF_INET) { 270 if (req->idiag_family == AF_INET) {
243 sk = inet_lookup(hashinfo, req->id.idiag_dst[0], 271 sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
244 req->id.idiag_dport, req->id.idiag_src[0], 272 req->id.idiag_dport, req->id.idiag_src[0],
245 req->id.idiag_sport, req->id.idiag_if); 273 req->id.idiag_sport, req->id.idiag_if);
246 } 274 }
247#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 275#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
248 else if (req->idiag_family == AF_INET6) { 276 else if (req->idiag_family == AF_INET6) {
249 sk = inet6_lookup(hashinfo, 277 sk = inet6_lookup(&init_net, hashinfo,
250 (struct in6_addr *)req->id.idiag_dst, 278 (struct in6_addr *)req->id.idiag_dst,
251 req->id.idiag_dport, 279 req->id.idiag_dport,
252 (struct in6_addr *)req->id.idiag_src, 280 (struct in6_addr *)req->id.idiag_src,
@@ -255,11 +283,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
255 } 283 }
256#endif 284#endif
257 else { 285 else {
258 return -EINVAL; 286 goto unlock;
259 } 287 }
260 288
289 err = -ENOENT;
261 if (sk == NULL) 290 if (sk == NULL)
262 return -ENOENT; 291 goto unlock;
263 292
264 err = -ESTALE; 293 err = -ESTALE;
265 if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || 294 if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
@@ -296,6 +325,8 @@ out:
296 else 325 else
297 sock_put(sk); 326 sock_put(sk);
298 } 327 }
328unlock:
329 inet_diag_unlock_handler(handler);
299 return err; 330 return err;
300} 331}
301 332
@@ -678,8 +709,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
678 const struct inet_diag_handler *handler; 709 const struct inet_diag_handler *handler;
679 struct inet_hashinfo *hashinfo; 710 struct inet_hashinfo *hashinfo;
680 711
681 handler = inet_diag_table[cb->nlh->nlmsg_type]; 712 handler = inet_diag_lock_handler(cb->nlh->nlmsg_type);
682 BUG_ON(handler == NULL); 713 if (IS_ERR(handler))
714 goto unlock;
715
683 hashinfo = handler->idiag_hashinfo; 716 hashinfo = handler->idiag_hashinfo;
684 717
685 s_i = cb->args[1]; 718 s_i = cb->args[1];
@@ -743,17 +776,18 @@ skip_listen_ht:
743 } 776 }
744 777
745 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) 778 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
746 return skb->len; 779 goto unlock;
747 780
748 for (i = s_i; i < hashinfo->ehash_size; i++) { 781 for (i = s_i; i < hashinfo->ehash_size; i++) {
749 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 782 struct inet_ehash_bucket *head = &hashinfo->ehash[i];
783 rwlock_t *lock = inet_ehash_lockp(hashinfo, i);
750 struct sock *sk; 784 struct sock *sk;
751 struct hlist_node *node; 785 struct hlist_node *node;
752 786
753 if (i > s_i) 787 if (i > s_i)
754 s_num = 0; 788 s_num = 0;
755 789
756 read_lock_bh(&head->lock); 790 read_lock_bh(lock);
757 num = 0; 791 num = 0;
758 sk_for_each(sk, node, &head->chain) { 792 sk_for_each(sk, node, &head->chain) {
759 struct inet_sock *inet = inet_sk(sk); 793 struct inet_sock *inet = inet_sk(sk);
@@ -769,7 +803,7 @@ skip_listen_ht:
769 r->id.idiag_dport) 803 r->id.idiag_dport)
770 goto next_normal; 804 goto next_normal;
771 if (inet_csk_diag_dump(sk, skb, cb) < 0) { 805 if (inet_csk_diag_dump(sk, skb, cb) < 0) {
772 read_unlock_bh(&head->lock); 806 read_unlock_bh(lock);
773 goto done; 807 goto done;
774 } 808 }
775next_normal: 809next_normal:
@@ -791,19 +825,21 @@ next_normal:
791 r->id.idiag_dport) 825 r->id.idiag_dport)
792 goto next_dying; 826 goto next_dying;
793 if (inet_twsk_diag_dump(tw, skb, cb) < 0) { 827 if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
794 read_unlock_bh(&head->lock); 828 read_unlock_bh(lock);
795 goto done; 829 goto done;
796 } 830 }
797next_dying: 831next_dying:
798 ++num; 832 ++num;
799 } 833 }
800 } 834 }
801 read_unlock_bh(&head->lock); 835 read_unlock_bh(lock);
802 } 836 }
803 837
804done: 838done:
805 cb->args[1] = i; 839 cb->args[1] = i;
806 cb->args[2] = num; 840 cb->args[2] = num;
841unlock:
842 inet_diag_unlock_handler(handler);
807 return skb->len; 843 return skb->len;
808} 844}
809 845
@@ -815,15 +851,6 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
815 nlmsg_len(nlh) < hdrlen) 851 nlmsg_len(nlh) < hdrlen)
816 return -EINVAL; 852 return -EINVAL;
817 853
818#ifdef CONFIG_KMOD
819 if (inet_diag_table[nlh->nlmsg_type] == NULL)
820 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
821 NETLINK_INET_DIAG, nlh->nlmsg_type);
822#endif
823
824 if (inet_diag_table[nlh->nlmsg_type] == NULL)
825 return -ENOENT;
826
827 if (nlh->nlmsg_flags & NLM_F_DUMP) { 854 if (nlh->nlmsg_flags & NLM_F_DUMP) {
828 if (nlmsg_attrlen(nlh, hdrlen)) { 855 if (nlmsg_attrlen(nlh, hdrlen)) {
829 struct nlattr *attr; 856 struct nlattr *attr;
@@ -852,8 +879,6 @@ static void inet_diag_rcv(struct sk_buff *skb)
852 mutex_unlock(&inet_diag_mutex); 879 mutex_unlock(&inet_diag_mutex);
853} 880}
854 881
855static DEFINE_SPINLOCK(inet_diag_register_lock);
856
857int inet_diag_register(const struct inet_diag_handler *h) 882int inet_diag_register(const struct inet_diag_handler *h)
858{ 883{
859 const __u16 type = h->idiag_type; 884 const __u16 type = h->idiag_type;
@@ -862,13 +887,13 @@ int inet_diag_register(const struct inet_diag_handler *h)
862 if (type >= INET_DIAG_GETSOCK_MAX) 887 if (type >= INET_DIAG_GETSOCK_MAX)
863 goto out; 888 goto out;
864 889
865 spin_lock(&inet_diag_register_lock); 890 mutex_lock(&inet_diag_table_mutex);
866 err = -EEXIST; 891 err = -EEXIST;
867 if (inet_diag_table[type] == NULL) { 892 if (inet_diag_table[type] == NULL) {
868 inet_diag_table[type] = h; 893 inet_diag_table[type] = h;
869 err = 0; 894 err = 0;
870 } 895 }
871 spin_unlock(&inet_diag_register_lock); 896 mutex_unlock(&inet_diag_table_mutex);
872out: 897out:
873 return err; 898 return err;
874} 899}
@@ -881,11 +906,9 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
881 if (type >= INET_DIAG_GETSOCK_MAX) 906 if (type >= INET_DIAG_GETSOCK_MAX)
882 return; 907 return;
883 908
884 spin_lock(&inet_diag_register_lock); 909 mutex_lock(&inet_diag_table_mutex);
885 inet_diag_table[type] = NULL; 910 inet_diag_table[type] = NULL;
886 spin_unlock(&inet_diag_register_lock); 911 mutex_unlock(&inet_diag_table_mutex);
887
888 synchronize_rcu();
889} 912}
890EXPORT_SYMBOL_GPL(inet_diag_unregister); 913EXPORT_SYMBOL_GPL(inet_diag_unregister);
891 914
@@ -913,7 +936,7 @@ out_free_table:
913 936
914static void __exit inet_diag_exit(void) 937static void __exit inet_diag_exit(void)
915{ 938{
916 sock_release(idiagnl->sk_socket); 939 netlink_kernel_release(idiagnl);
917 kfree(inet_diag_table); 940 kfree(inet_diag_table);
918} 941}
919 942
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e15e04fc6661..724d69aed031 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -47,7 +47,7 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
47 } 47 }
48 write_unlock(&f->lock); 48 write_unlock(&f->lock);
49 49
50 mod_timer(&f->secret_timer, now + f->ctl->secret_interval); 50 mod_timer(&f->secret_timer, now + f->secret_interval);
51} 51}
52 52
53void inet_frags_init(struct inet_frags *f) 53void inet_frags_init(struct inet_frags *f)
@@ -57,35 +57,45 @@ void inet_frags_init(struct inet_frags *f)
57 for (i = 0; i < INETFRAGS_HASHSZ; i++) 57 for (i = 0; i < INETFRAGS_HASHSZ; i++)
58 INIT_HLIST_HEAD(&f->hash[i]); 58 INIT_HLIST_HEAD(&f->hash[i]);
59 59
60 INIT_LIST_HEAD(&f->lru_list);
61 rwlock_init(&f->lock); 60 rwlock_init(&f->lock);
62 61
63 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 62 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
64 (jiffies ^ (jiffies >> 6))); 63 (jiffies ^ (jiffies >> 6)));
65 64
66 f->nqueues = 0; 65 setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
67 atomic_set(&f->mem, 0); 66 (unsigned long)f);
68 67 f->secret_timer.expires = jiffies + f->secret_interval;
69 init_timer(&f->secret_timer);
70 f->secret_timer.function = inet_frag_secret_rebuild;
71 f->secret_timer.data = (unsigned long)f;
72 f->secret_timer.expires = jiffies + f->ctl->secret_interval;
73 add_timer(&f->secret_timer); 68 add_timer(&f->secret_timer);
74} 69}
75EXPORT_SYMBOL(inet_frags_init); 70EXPORT_SYMBOL(inet_frags_init);
76 71
72void inet_frags_init_net(struct netns_frags *nf)
73{
74 nf->nqueues = 0;
75 atomic_set(&nf->mem, 0);
76 INIT_LIST_HEAD(&nf->lru_list);
77}
78EXPORT_SYMBOL(inet_frags_init_net);
79
77void inet_frags_fini(struct inet_frags *f) 80void inet_frags_fini(struct inet_frags *f)
78{ 81{
79 del_timer(&f->secret_timer); 82 del_timer(&f->secret_timer);
80} 83}
81EXPORT_SYMBOL(inet_frags_fini); 84EXPORT_SYMBOL(inet_frags_fini);
82 85
86void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
87{
88 nf->low_thresh = 0;
89 inet_frag_evictor(nf, f);
90}
91EXPORT_SYMBOL(inet_frags_exit_net);
92
83static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 93static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
84{ 94{
85 write_lock(&f->lock); 95 write_lock(&f->lock);
86 hlist_del(&fq->list); 96 hlist_del(&fq->list);
87 list_del(&fq->lru_list); 97 list_del(&fq->lru_list);
88 f->nqueues--; 98 fq->net->nqueues--;
89 write_unlock(&f->lock); 99 write_unlock(&f->lock);
90} 100}
91 101
@@ -103,13 +113,13 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
103 113
104EXPORT_SYMBOL(inet_frag_kill); 114EXPORT_SYMBOL(inet_frag_kill);
105 115
106static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb, 116static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
107 int *work) 117 struct sk_buff *skb, int *work)
108{ 118{
109 if (work) 119 if (work)
110 *work -= skb->truesize; 120 *work -= skb->truesize;
111 121
112 atomic_sub(skb->truesize, &f->mem); 122 atomic_sub(skb->truesize, &nf->mem);
113 if (f->skb_free) 123 if (f->skb_free)
114 f->skb_free(skb); 124 f->skb_free(skb);
115 kfree_skb(skb); 125 kfree_skb(skb);
@@ -119,22 +129,24 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
119 int *work) 129 int *work)
120{ 130{
121 struct sk_buff *fp; 131 struct sk_buff *fp;
132 struct netns_frags *nf;
122 133
123 BUG_TRAP(q->last_in & COMPLETE); 134 BUG_TRAP(q->last_in & COMPLETE);
124 BUG_TRAP(del_timer(&q->timer) == 0); 135 BUG_TRAP(del_timer(&q->timer) == 0);
125 136
126 /* Release all fragment data. */ 137 /* Release all fragment data. */
127 fp = q->fragments; 138 fp = q->fragments;
139 nf = q->net;
128 while (fp) { 140 while (fp) {
129 struct sk_buff *xp = fp->next; 141 struct sk_buff *xp = fp->next;
130 142
131 frag_kfree_skb(f, fp, work); 143 frag_kfree_skb(nf, f, fp, work);
132 fp = xp; 144 fp = xp;
133 } 145 }
134 146
135 if (work) 147 if (work)
136 *work -= f->qsize; 148 *work -= f->qsize;
137 atomic_sub(f->qsize, &f->mem); 149 atomic_sub(f->qsize, &nf->mem);
138 150
139 if (f->destructor) 151 if (f->destructor)
140 f->destructor(q); 152 f->destructor(q);
@@ -143,20 +155,20 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
143} 155}
144EXPORT_SYMBOL(inet_frag_destroy); 156EXPORT_SYMBOL(inet_frag_destroy);
145 157
146int inet_frag_evictor(struct inet_frags *f) 158int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
147{ 159{
148 struct inet_frag_queue *q; 160 struct inet_frag_queue *q;
149 int work, evicted = 0; 161 int work, evicted = 0;
150 162
151 work = atomic_read(&f->mem) - f->ctl->low_thresh; 163 work = atomic_read(&nf->mem) - nf->low_thresh;
152 while (work > 0) { 164 while (work > 0) {
153 read_lock(&f->lock); 165 read_lock(&f->lock);
154 if (list_empty(&f->lru_list)) { 166 if (list_empty(&nf->lru_list)) {
155 read_unlock(&f->lock); 167 read_unlock(&f->lock);
156 break; 168 break;
157 } 169 }
158 170
159 q = list_first_entry(&f->lru_list, 171 q = list_first_entry(&nf->lru_list,
160 struct inet_frag_queue, lru_list); 172 struct inet_frag_queue, lru_list);
161 atomic_inc(&q->refcnt); 173 atomic_inc(&q->refcnt);
162 read_unlock(&f->lock); 174 read_unlock(&f->lock);
@@ -175,8 +187,9 @@ int inet_frag_evictor(struct inet_frags *f)
175} 187}
176EXPORT_SYMBOL(inet_frag_evictor); 188EXPORT_SYMBOL(inet_frag_evictor);
177 189
178static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, 190static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
179 struct inet_frags *f, unsigned int hash, void *arg) 191 struct inet_frag_queue *qp_in, struct inet_frags *f,
192 unsigned int hash, void *arg)
180{ 193{
181 struct inet_frag_queue *qp; 194 struct inet_frag_queue *qp;
182#ifdef CONFIG_SMP 195#ifdef CONFIG_SMP
@@ -190,7 +203,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
190 * promoted read lock to write lock. 203 * promoted read lock to write lock.
191 */ 204 */
192 hlist_for_each_entry(qp, n, &f->hash[hash], list) { 205 hlist_for_each_entry(qp, n, &f->hash[hash], list) {
193 if (f->match(qp, arg)) { 206 if (qp->net == nf && f->match(qp, arg)) {
194 atomic_inc(&qp->refcnt); 207 atomic_inc(&qp->refcnt);
195 write_unlock(&f->lock); 208 write_unlock(&f->lock);
196 qp_in->last_in |= COMPLETE; 209 qp_in->last_in |= COMPLETE;
@@ -200,18 +213,19 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
200 } 213 }
201#endif 214#endif
202 qp = qp_in; 215 qp = qp_in;
203 if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) 216 if (!mod_timer(&qp->timer, jiffies + nf->timeout))
204 atomic_inc(&qp->refcnt); 217 atomic_inc(&qp->refcnt);
205 218
206 atomic_inc(&qp->refcnt); 219 atomic_inc(&qp->refcnt);
207 hlist_add_head(&qp->list, &f->hash[hash]); 220 hlist_add_head(&qp->list, &f->hash[hash]);
208 list_add_tail(&qp->lru_list, &f->lru_list); 221 list_add_tail(&qp->lru_list, &nf->lru_list);
209 f->nqueues++; 222 nf->nqueues++;
210 write_unlock(&f->lock); 223 write_unlock(&f->lock);
211 return qp; 224 return qp;
212} 225}
213 226
214static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) 227static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
228 struct inet_frags *f, void *arg)
215{ 229{
216 struct inet_frag_queue *q; 230 struct inet_frag_queue *q;
217 231
@@ -220,35 +234,36 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
220 return NULL; 234 return NULL;
221 235
222 f->constructor(q, arg); 236 f->constructor(q, arg);
223 atomic_add(f->qsize, &f->mem); 237 atomic_add(f->qsize, &nf->mem);
224 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 238 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
225 spin_lock_init(&q->lock); 239 spin_lock_init(&q->lock);
226 atomic_set(&q->refcnt, 1); 240 atomic_set(&q->refcnt, 1);
241 q->net = nf;
227 242
228 return q; 243 return q;
229} 244}
230 245
231static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, 246static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
232 void *arg, unsigned int hash) 247 struct inet_frags *f, void *arg, unsigned int hash)
233{ 248{
234 struct inet_frag_queue *q; 249 struct inet_frag_queue *q;
235 250
236 q = inet_frag_alloc(f, arg); 251 q = inet_frag_alloc(nf, f, arg);
237 if (q == NULL) 252 if (q == NULL)
238 return NULL; 253 return NULL;
239 254
240 return inet_frag_intern(q, f, hash, arg); 255 return inet_frag_intern(nf, q, f, hash, arg);
241} 256}
242 257
243struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, 258struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
244 unsigned int hash) 259 struct inet_frags *f, void *key, unsigned int hash)
245{ 260{
246 struct inet_frag_queue *q; 261 struct inet_frag_queue *q;
247 struct hlist_node *n; 262 struct hlist_node *n;
248 263
249 read_lock(&f->lock); 264 read_lock(&f->lock);
250 hlist_for_each_entry(q, n, &f->hash[hash], list) { 265 hlist_for_each_entry(q, n, &f->hash[hash], list) {
251 if (f->match(q, key)) { 266 if (q->net == nf && f->match(q, key)) {
252 atomic_inc(&q->refcnt); 267 atomic_inc(&q->refcnt);
253 read_unlock(&f->lock); 268 read_unlock(&f->lock);
254 return q; 269 return q;
@@ -256,6 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
256 } 271 }
257 read_unlock(&f->lock); 272 read_unlock(&f->lock);
258 273
259 return inet_frag_create(f, key, hash); 274 return inet_frag_create(nf, f, key, hash);
260} 275}
261EXPORT_SYMBOL(inet_frag_find); 276EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 16eecc7046a3..48d45008f749 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -28,12 +28,14 @@
28 * The bindhash mutex for snum's hash chain must be held here. 28 * The bindhash mutex for snum's hash chain must be held here.
29 */ 29 */
30struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, 30struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
31 struct net *net,
31 struct inet_bind_hashbucket *head, 32 struct inet_bind_hashbucket *head,
32 const unsigned short snum) 33 const unsigned short snum)
33{ 34{
34 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); 35 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
35 36
36 if (tb != NULL) { 37 if (tb != NULL) {
38 tb->ib_net = net;
37 tb->port = snum; 39 tb->port = snum;
38 tb->fastreuse = 0; 40 tb->fastreuse = 0;
39 INIT_HLIST_HEAD(&tb->owners); 41 INIT_HLIST_HEAD(&tb->owners);
@@ -96,6 +98,7 @@ EXPORT_SYMBOL(inet_put_port);
96 * exclusive lock release). It should be ifdefed really. 98 * exclusive lock release). It should be ifdefed really.
97 */ 99 */
98void inet_listen_wlock(struct inet_hashinfo *hashinfo) 100void inet_listen_wlock(struct inet_hashinfo *hashinfo)
101 __acquires(hashinfo->lhash_lock)
99{ 102{
100 write_lock(&hashinfo->lhash_lock); 103 write_lock(&hashinfo->lhash_lock);
101 104
@@ -124,7 +127,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
124 * remote address for the connection. So always assume those are both 127 * remote address for the connection. So always assume those are both
125 * wildcarded during the search since they can never be otherwise. 128 * wildcarded during the search since they can never be otherwise.
126 */ 129 */
127static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, 130static struct sock *inet_lookup_listener_slow(struct net *net,
131 const struct hlist_head *head,
128 const __be32 daddr, 132 const __be32 daddr,
129 const unsigned short hnum, 133 const unsigned short hnum,
130 const int dif) 134 const int dif)
@@ -136,7 +140,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
136 sk_for_each(sk, node, head) { 140 sk_for_each(sk, node, head) {
137 const struct inet_sock *inet = inet_sk(sk); 141 const struct inet_sock *inet = inet_sk(sk);
138 142
139 if (inet->num == hnum && !ipv6_only_sock(sk)) { 143 if (sk->sk_net == net && inet->num == hnum &&
144 !ipv6_only_sock(sk)) {
140 const __be32 rcv_saddr = inet->rcv_saddr; 145 const __be32 rcv_saddr = inet->rcv_saddr;
141 int score = sk->sk_family == PF_INET ? 1 : 0; 146 int score = sk->sk_family == PF_INET ? 1 : 0;
142 147
@@ -162,7 +167,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
162} 167}
163 168
164/* Optimize the common listener case. */ 169/* Optimize the common listener case. */
165struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, 170struct sock *__inet_lookup_listener(struct net *net,
171 struct inet_hashinfo *hashinfo,
166 const __be32 daddr, const unsigned short hnum, 172 const __be32 daddr, const unsigned short hnum,
167 const int dif) 173 const int dif)
168{ 174{
@@ -177,9 +183,9 @@ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
177 if (inet->num == hnum && !sk->sk_node.next && 183 if (inet->num == hnum && !sk->sk_node.next &&
178 (!inet->rcv_saddr || inet->rcv_saddr == daddr) && 184 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
179 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && 185 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
180 !sk->sk_bound_dev_if) 186 !sk->sk_bound_dev_if && sk->sk_net == net)
181 goto sherry_cache; 187 goto sherry_cache;
182 sk = inet_lookup_listener_slow(head, daddr, hnum, dif); 188 sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif);
183 } 189 }
184 if (sk) { 190 if (sk) {
185sherry_cache: 191sherry_cache:
@@ -190,6 +196,47 @@ sherry_cache:
190} 196}
191EXPORT_SYMBOL_GPL(__inet_lookup_listener); 197EXPORT_SYMBOL_GPL(__inet_lookup_listener);
192 198
199struct sock * __inet_lookup_established(struct net *net,
200 struct inet_hashinfo *hashinfo,
201 const __be32 saddr, const __be16 sport,
202 const __be32 daddr, const u16 hnum,
203 const int dif)
204{
205 INET_ADDR_COOKIE(acookie, saddr, daddr)
206 const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
207 struct sock *sk;
208 const struct hlist_node *node;
209 /* Optimize here for direct hit, only listening connections can
210 * have wildcards anyways.
211 */
212 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
213 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
214 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
215
216 prefetch(head->chain.first);
217 read_lock(lock);
218 sk_for_each(sk, node, &head->chain) {
219 if (INET_MATCH(sk, net, hash, acookie,
220 saddr, daddr, ports, dif))
221 goto hit; /* You sunk my battleship! */
222 }
223
224 /* Must check for a TIME_WAIT'er before going to listener hash. */
225 sk_for_each(sk, node, &head->twchain) {
226 if (INET_TW_MATCH(sk, net, hash, acookie,
227 saddr, daddr, ports, dif))
228 goto hit;
229 }
230 sk = NULL;
231out:
232 read_unlock(lock);
233 return sk;
234hit:
235 sock_hold(sk);
236 goto out;
237}
238EXPORT_SYMBOL_GPL(__inet_lookup_established);
239
193/* called with local bh disabled */ 240/* called with local bh disabled */
194static int __inet_check_established(struct inet_timewait_death_row *death_row, 241static int __inet_check_established(struct inet_timewait_death_row *death_row,
195 struct sock *sk, __u16 lport, 242 struct sock *sk, __u16 lport,
@@ -204,18 +251,21 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
204 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); 251 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
205 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); 252 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
206 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 253 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
254 rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
207 struct sock *sk2; 255 struct sock *sk2;
208 const struct hlist_node *node; 256 const struct hlist_node *node;
209 struct inet_timewait_sock *tw; 257 struct inet_timewait_sock *tw;
258 struct net *net = sk->sk_net;
210 259
211 prefetch(head->chain.first); 260 prefetch(head->chain.first);
212 write_lock(&head->lock); 261 write_lock(lock);
213 262
214 /* Check TIME-WAIT sockets first. */ 263 /* Check TIME-WAIT sockets first. */
215 sk_for_each(sk2, node, &head->twchain) { 264 sk_for_each(sk2, node, &head->twchain) {
216 tw = inet_twsk(sk2); 265 tw = inet_twsk(sk2);
217 266
218 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { 267 if (INET_TW_MATCH(sk2, net, hash, acookie,
268 saddr, daddr, ports, dif)) {
219 if (twsk_unique(sk, sk2, twp)) 269 if (twsk_unique(sk, sk2, twp))
220 goto unique; 270 goto unique;
221 else 271 else
@@ -226,7 +276,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
226 276
227 /* And established part... */ 277 /* And established part... */
228 sk_for_each(sk2, node, &head->chain) { 278 sk_for_each(sk2, node, &head->chain) {
229 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) 279 if (INET_MATCH(sk2, net, hash, acookie,
280 saddr, daddr, ports, dif))
230 goto not_unique; 281 goto not_unique;
231 } 282 }
232 283
@@ -238,8 +289,8 @@ unique:
238 sk->sk_hash = hash; 289 sk->sk_hash = hash;
239 BUG_TRAP(sk_unhashed(sk)); 290 BUG_TRAP(sk_unhashed(sk));
240 __sk_add_node(sk, &head->chain); 291 __sk_add_node(sk, &head->chain);
241 sock_prot_inc_use(sk->sk_prot); 292 sock_prot_inuse_add(sk->sk_prot, 1);
242 write_unlock(&head->lock); 293 write_unlock(lock);
243 294
244 if (twp) { 295 if (twp) {
245 *twp = tw; 296 *twp = tw;
@@ -255,7 +306,7 @@ unique:
255 return 0; 306 return 0;
256 307
257not_unique: 308not_unique:
258 write_unlock(&head->lock); 309 write_unlock(lock);
259 return -EADDRNOTAVAIL; 310 return -EADDRNOTAVAIL;
260} 311}
261 312
@@ -266,17 +317,60 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
266 inet->dport); 317 inet->dport);
267} 318}
268 319
269/* 320void __inet_hash_nolisten(struct inet_hashinfo *hashinfo, struct sock *sk)
270 * Bind a port for a connect operation and hash it. 321{
271 */ 322 struct hlist_head *list;
272int inet_hash_connect(struct inet_timewait_death_row *death_row, 323 rwlock_t *lock;
273 struct sock *sk) 324 struct inet_ehash_bucket *head;
325
326 BUG_TRAP(sk_unhashed(sk));
327
328 sk->sk_hash = inet_sk_ehashfn(sk);
329 head = inet_ehash_bucket(hashinfo, sk->sk_hash);
330 list = &head->chain;
331 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
332
333 write_lock(lock);
334 __sk_add_node(sk, list);
335 sock_prot_inuse_add(sk->sk_prot, 1);
336 write_unlock(lock);
337}
338EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
339
340void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
341{
342 struct hlist_head *list;
343 rwlock_t *lock;
344
345 if (sk->sk_state != TCP_LISTEN) {
346 __inet_hash_nolisten(hashinfo, sk);
347 return;
348 }
349
350 BUG_TRAP(sk_unhashed(sk));
351 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
352 lock = &hashinfo->lhash_lock;
353
354 inet_listen_wlock(hashinfo);
355 __sk_add_node(sk, list);
356 sock_prot_inuse_add(sk->sk_prot, 1);
357 write_unlock(lock);
358 wake_up(&hashinfo->lhash_wait);
359}
360EXPORT_SYMBOL_GPL(__inet_hash);
361
362int __inet_hash_connect(struct inet_timewait_death_row *death_row,
363 struct sock *sk,
364 int (*check_established)(struct inet_timewait_death_row *,
365 struct sock *, __u16, struct inet_timewait_sock **),
366 void (*hash)(struct inet_hashinfo *, struct sock *))
274{ 367{
275 struct inet_hashinfo *hinfo = death_row->hashinfo; 368 struct inet_hashinfo *hinfo = death_row->hashinfo;
276 const unsigned short snum = inet_sk(sk)->num; 369 const unsigned short snum = inet_sk(sk)->num;
277 struct inet_bind_hashbucket *head; 370 struct inet_bind_hashbucket *head;
278 struct inet_bind_bucket *tb; 371 struct inet_bind_bucket *tb;
279 int ret; 372 int ret;
373 struct net *net = sk->sk_net;
280 374
281 if (!snum) { 375 if (!snum) {
282 int i, remaining, low, high, port; 376 int i, remaining, low, high, port;
@@ -299,19 +393,19 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
299 * unique enough. 393 * unique enough.
300 */ 394 */
301 inet_bind_bucket_for_each(tb, node, &head->chain) { 395 inet_bind_bucket_for_each(tb, node, &head->chain) {
302 if (tb->port == port) { 396 if (tb->ib_net == net && tb->port == port) {
303 BUG_TRAP(!hlist_empty(&tb->owners)); 397 BUG_TRAP(!hlist_empty(&tb->owners));
304 if (tb->fastreuse >= 0) 398 if (tb->fastreuse >= 0)
305 goto next_port; 399 goto next_port;
306 if (!__inet_check_established(death_row, 400 if (!check_established(death_row, sk,
307 sk, port, 401 port, &tw))
308 &tw))
309 goto ok; 402 goto ok;
310 goto next_port; 403 goto next_port;
311 } 404 }
312 } 405 }
313 406
314 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); 407 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
408 net, head, port);
315 if (!tb) { 409 if (!tb) {
316 spin_unlock(&head->lock); 410 spin_unlock(&head->lock);
317 break; 411 break;
@@ -333,7 +427,7 @@ ok:
333 inet_bind_hash(sk, tb, port); 427 inet_bind_hash(sk, tb, port);
334 if (sk_unhashed(sk)) { 428 if (sk_unhashed(sk)) {
335 inet_sk(sk)->sport = htons(port); 429 inet_sk(sk)->sport = htons(port);
336 __inet_hash(hinfo, sk, 0); 430 hash(hinfo, sk);
337 } 431 }
338 spin_unlock(&head->lock); 432 spin_unlock(&head->lock);
339 433
@@ -350,17 +444,28 @@ ok:
350 tb = inet_csk(sk)->icsk_bind_hash; 444 tb = inet_csk(sk)->icsk_bind_hash;
351 spin_lock_bh(&head->lock); 445 spin_lock_bh(&head->lock);
352 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 446 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
353 __inet_hash(hinfo, sk, 0); 447 hash(hinfo, sk);
354 spin_unlock_bh(&head->lock); 448 spin_unlock_bh(&head->lock);
355 return 0; 449 return 0;
356 } else { 450 } else {
357 spin_unlock(&head->lock); 451 spin_unlock(&head->lock);
358 /* No definite answer... Walk to established hash table */ 452 /* No definite answer... Walk to established hash table */
359 ret = __inet_check_established(death_row, sk, snum, NULL); 453 ret = check_established(death_row, sk, snum, NULL);
360out: 454out:
361 local_bh_enable(); 455 local_bh_enable();
362 return ret; 456 return ret;
363 } 457 }
364} 458}
459EXPORT_SYMBOL_GPL(__inet_hash_connect);
460
461/*
462 * Bind a port for a connect operation and hash it.
463 */
464int inet_hash_connect(struct inet_timewait_death_row *death_row,
465 struct sock *sk)
466{
467 return __inet_hash_connect(death_row, sk,
468 __inet_check_established, __inet_hash_nolisten);
469}
365 470
366EXPORT_SYMBOL_GPL(inet_hash_connect); 471EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index ac3b1d3dba2e..4a4d49fca1f2 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -310,7 +310,7 @@ static void lro_flush(struct net_lro_mgr *lro_mgr,
310 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; 310 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
311 311
312 if (lro_desc->vgrp) { 312 if (lro_desc->vgrp) {
313 if (test_bit(LRO_F_NAPI, &lro_mgr->features)) 313 if (lro_mgr->features & LRO_F_NAPI)
314 vlan_hwaccel_receive_skb(lro_desc->parent, 314 vlan_hwaccel_receive_skb(lro_desc->parent,
315 lro_desc->vgrp, 315 lro_desc->vgrp,
316 lro_desc->vlan_tag); 316 lro_desc->vlan_tag);
@@ -320,7 +320,7 @@ static void lro_flush(struct net_lro_mgr *lro_mgr,
320 lro_desc->vlan_tag); 320 lro_desc->vlan_tag);
321 321
322 } else { 322 } else {
323 if (test_bit(LRO_F_NAPI, &lro_mgr->features)) 323 if (lro_mgr->features & LRO_F_NAPI)
324 netif_receive_skb(lro_desc->parent); 324 netif_receive_skb(lro_desc->parent);
325 else 325 else
326 netif_rx(lro_desc->parent); 326 netif_rx(lro_desc->parent);
@@ -352,7 +352,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
352 goto out; 352 goto out;
353 353
354 if ((skb->protocol == htons(ETH_P_8021Q)) 354 if ((skb->protocol == htons(ETH_P_8021Q))
355 && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) 355 && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
356 vlan_hdr_len = VLAN_HLEN; 356 vlan_hdr_len = VLAN_HLEN;
357 357
358 if (!lro_desc->active) { /* start new lro session */ 358 if (!lro_desc->active) { /* start new lro session */
@@ -401,10 +401,11 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
401 int data_len = len; 401 int data_len = len;
402 int hdr_len = min(len, hlen); 402 int hdr_len = min(len, hlen);
403 403
404 skb = netdev_alloc_skb(lro_mgr->dev, hlen); 404 skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
405 if (!skb) 405 if (!skb)
406 return NULL; 406 return NULL;
407 407
408 skb_reserve(skb, lro_mgr->frag_align_pad);
408 skb->len = len; 409 skb->len = len;
409 skb->data_len = len - hdr_len; 410 skb->data_len = len - hdr_len;
410 skb->truesize += true_size; 411 skb->truesize += true_size;
@@ -473,7 +474,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
473 goto out; 474 goto out;
474 475
475 if ((skb->protocol == htons(ETH_P_8021Q)) 476 if ((skb->protocol == htons(ETH_P_8021Q))
476 && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) 477 && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
477 vlan_hdr_len = VLAN_HLEN; 478 vlan_hdr_len = VLAN_HLEN;
478 479
479 iph = (void *)(skb->data + vlan_hdr_len); 480 iph = (void *)(skb->data + vlan_hdr_len);
@@ -515,7 +516,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
515 void *priv) 516 void *priv)
516{ 517{
517 if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { 518 if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) {
518 if (test_bit(LRO_F_NAPI, &lro_mgr->features)) 519 if (lro_mgr->features & LRO_F_NAPI)
519 netif_receive_skb(skb); 520 netif_receive_skb(skb);
520 else 521 else
521 netif_rx(skb); 522 netif_rx(skb);
@@ -530,7 +531,7 @@ void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
530 void *priv) 531 void *priv)
531{ 532{
532 if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { 533 if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) {
533 if (test_bit(LRO_F_NAPI, &lro_mgr->features)) 534 if (lro_mgr->features & LRO_F_NAPI)
534 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); 535 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
535 else 536 else
536 vlan_hwaccel_rx(skb, vgrp, vlan_tag); 537 vlan_hwaccel_rx(skb, vgrp, vlan_tag);
@@ -549,7 +550,7 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr,
549 if (!skb) 550 if (!skb)
550 return; 551 return;
551 552
552 if (test_bit(LRO_F_NAPI, &lro_mgr->features)) 553 if (lro_mgr->features & LRO_F_NAPI)
553 netif_receive_skb(skb); 554 netif_receive_skb(skb);
554 else 555 else
555 netif_rx(skb); 556 netif_rx(skb);
@@ -569,7 +570,7 @@ void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
569 if (!skb) 570 if (!skb)
570 return; 571 return;
571 572
572 if (test_bit(LRO_F_NAPI, &lro_mgr->features)) 573 if (lro_mgr->features & LRO_F_NAPI)
573 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); 574 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
574 else 575 else
575 vlan_hwaccel_rx(skb, vgrp, vlan_tag); 576 vlan_hwaccel_rx(skb, vgrp, vlan_tag);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 4e189e28f306..876169f3a528 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
20 struct inet_bind_hashbucket *bhead; 20 struct inet_bind_hashbucket *bhead;
21 struct inet_bind_bucket *tb; 21 struct inet_bind_bucket *tb;
22 /* Unlink from established hashes. */ 22 /* Unlink from established hashes. */
23 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); 23 rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
24 24
25 write_lock(&ehead->lock); 25 write_lock(lock);
26 if (hlist_unhashed(&tw->tw_node)) { 26 if (hlist_unhashed(&tw->tw_node)) {
27 write_unlock(&ehead->lock); 27 write_unlock(lock);
28 return; 28 return;
29 } 29 }
30 __hlist_del(&tw->tw_node); 30 __hlist_del(&tw->tw_node);
31 sk_node_init(&tw->tw_node); 31 sk_node_init(&tw->tw_node);
32 write_unlock(&ehead->lock); 32 write_unlock(lock);
33 33
34 /* Disassociate with bind bucket. */ 34 /* Disassociate with bind bucket. */
35 bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; 35 bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
@@ -48,6 +48,21 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
48 inet_twsk_put(tw); 48 inet_twsk_put(tw);
49} 49}
50 50
51void inet_twsk_put(struct inet_timewait_sock *tw)
52{
53 if (atomic_dec_and_test(&tw->tw_refcnt)) {
54 struct module *owner = tw->tw_prot->owner;
55 twsk_destructor((struct sock *)tw);
56#ifdef SOCK_REFCNT_DEBUG
57 printk(KERN_DEBUG "%s timewait_sock %p released\n",
58 tw->tw_prot->name, tw);
59#endif
60 kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
61 module_put(owner);
62 }
63}
64EXPORT_SYMBOL_GPL(inet_twsk_put);
65
51/* 66/*
52 * Enter the time wait state. This is called with locally disabled BH. 67 * Enter the time wait state. This is called with locally disabled BH.
53 * Essentially we whip up a timewait bucket, copy the relevant info into it 68 * Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -59,6 +74,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
59 const struct inet_sock *inet = inet_sk(sk); 74 const struct inet_sock *inet = inet_sk(sk);
60 const struct inet_connection_sock *icsk = inet_csk(sk); 75 const struct inet_connection_sock *icsk = inet_csk(sk);
61 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); 76 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
77 rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
62 struct inet_bind_hashbucket *bhead; 78 struct inet_bind_hashbucket *bhead;
63 /* Step 1: Put TW into bind hash. Original socket stays there too. 79 /* Step 1: Put TW into bind hash. Original socket stays there too.
64 Note, that any socket with inet->num != 0 MUST be bound in 80 Note, that any socket with inet->num != 0 MUST be bound in
@@ -71,17 +87,17 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
71 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 87 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
72 spin_unlock(&bhead->lock); 88 spin_unlock(&bhead->lock);
73 89
74 write_lock(&ehead->lock); 90 write_lock(lock);
75 91
76 /* Step 2: Remove SK from established hash. */ 92 /* Step 2: Remove SK from established hash. */
77 if (__sk_del_node_init(sk)) 93 if (__sk_del_node_init(sk))
78 sock_prot_dec_use(sk->sk_prot); 94 sock_prot_inuse_add(sk->sk_prot, -1);
79 95
80 /* Step 3: Hash TW into TIMEWAIT chain. */ 96 /* Step 3: Hash TW into TIMEWAIT chain. */
81 inet_twsk_add_node(tw, &ehead->twchain); 97 inet_twsk_add_node(tw, &ehead->twchain);
82 atomic_inc(&tw->tw_refcnt); 98 atomic_inc(&tw->tw_refcnt);
83 99
84 write_unlock(&ehead->lock); 100 write_unlock(lock);
85} 101}
86 102
87EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 103EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
@@ -193,16 +209,14 @@ out:
193 209
194EXPORT_SYMBOL_GPL(inet_twdr_hangman); 210EXPORT_SYMBOL_GPL(inet_twdr_hangman);
195 211
196extern void twkill_slots_invalid(void);
197
198void inet_twdr_twkill_work(struct work_struct *work) 212void inet_twdr_twkill_work(struct work_struct *work)
199{ 213{
200 struct inet_timewait_death_row *twdr = 214 struct inet_timewait_death_row *twdr =
201 container_of(work, struct inet_timewait_death_row, twkill_work); 215 container_of(work, struct inet_timewait_death_row, twkill_work);
202 int i; 216 int i;
203 217
204 if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) 218 BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) >
205 twkill_slots_invalid(); 219 (sizeof(twdr->thread_slots) * 8));
206 220
207 while (twdr->thread_slots) { 221 while (twdr->thread_slots) {
208 spin_lock_bh(&twdr->death_lock); 222 spin_lock_bh(&twdr->death_lock);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 771031dfbd0f..af995198f643 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -61,7 +61,7 @@
61 * 4. Global variable peer_total is modified under the pool lock. 61 * 4. Global variable peer_total is modified under the pool lock.
62 * 5. struct inet_peer fields modification: 62 * 5. struct inet_peer fields modification:
63 * avl_left, avl_right, avl_parent, avl_height: pool lock 63 * avl_left, avl_right, avl_parent, avl_height: pool lock
64 * unused_next, unused_prevp: unused node list lock 64 * unused: unused node list lock
65 * refcnt: atomically against modifications on other CPU; 65 * refcnt: atomically against modifications on other CPU;
66 * usually under some other lock to prevent node disappearing 66 * usually under some other lock to prevent node disappearing
67 * dtime: unused node list lock 67 * dtime: unused node list lock
@@ -94,8 +94,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
94int inet_peer_gc_mintime __read_mostly = 10 * HZ; 94int inet_peer_gc_mintime __read_mostly = 10 * HZ;
95int inet_peer_gc_maxtime __read_mostly = 120 * HZ; 95int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
96 96
97static struct inet_peer *inet_peer_unused_head; 97static LIST_HEAD(unused_peers);
98static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
99static DEFINE_SPINLOCK(inet_peer_unused_lock); 98static DEFINE_SPINLOCK(inet_peer_unused_lock);
100 99
101static void peer_check_expire(unsigned long dummy); 100static void peer_check_expire(unsigned long dummy);
@@ -138,15 +137,7 @@ void __init inet_initpeers(void)
138static void unlink_from_unused(struct inet_peer *p) 137static void unlink_from_unused(struct inet_peer *p)
139{ 138{
140 spin_lock_bh(&inet_peer_unused_lock); 139 spin_lock_bh(&inet_peer_unused_lock);
141 if (p->unused_prevp != NULL) { 140 list_del_init(&p->unused);
142 /* On unused list. */
143 *p->unused_prevp = p->unused_next;
144 if (p->unused_next != NULL)
145 p->unused_next->unused_prevp = p->unused_prevp;
146 else
147 inet_peer_unused_tailp = p->unused_prevp;
148 p->unused_prevp = NULL; /* mark it as removed */
149 }
150 spin_unlock_bh(&inet_peer_unused_lock); 141 spin_unlock_bh(&inet_peer_unused_lock);
151} 142}
152 143
@@ -337,24 +328,24 @@ static void unlink_from_pool(struct inet_peer *p)
337/* May be called with local BH enabled. */ 328/* May be called with local BH enabled. */
338static int cleanup_once(unsigned long ttl) 329static int cleanup_once(unsigned long ttl)
339{ 330{
340 struct inet_peer *p; 331 struct inet_peer *p = NULL;
341 332
342 /* Remove the first entry from the list of unused nodes. */ 333 /* Remove the first entry from the list of unused nodes. */
343 spin_lock_bh(&inet_peer_unused_lock); 334 spin_lock_bh(&inet_peer_unused_lock);
344 p = inet_peer_unused_head; 335 if (!list_empty(&unused_peers)) {
345 if (p != NULL) { 336 __u32 delta;
346 __u32 delta = (__u32)jiffies - p->dtime; 337
338 p = list_first_entry(&unused_peers, struct inet_peer, unused);
339 delta = (__u32)jiffies - p->dtime;
340
347 if (delta < ttl) { 341 if (delta < ttl) {
348 /* Do not prune fresh entries. */ 342 /* Do not prune fresh entries. */
349 spin_unlock_bh(&inet_peer_unused_lock); 343 spin_unlock_bh(&inet_peer_unused_lock);
350 return -1; 344 return -1;
351 } 345 }
352 inet_peer_unused_head = p->unused_next; 346
353 if (p->unused_next != NULL) 347 list_del_init(&p->unused);
354 p->unused_next->unused_prevp = p->unused_prevp; 348
355 else
356 inet_peer_unused_tailp = p->unused_prevp;
357 p->unused_prevp = NULL; /* mark as not on the list */
358 /* Grab an extra reference to prevent node disappearing 349 /* Grab an extra reference to prevent node disappearing
359 * before unlink_from_pool() call. */ 350 * before unlink_from_pool() call. */
360 atomic_inc(&p->refcnt); 351 atomic_inc(&p->refcnt);
@@ -412,7 +403,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
412 403
413 /* Link the node. */ 404 /* Link the node. */
414 link_to_pool(n); 405 link_to_pool(n);
415 n->unused_prevp = NULL; /* not on the list */ 406 INIT_LIST_HEAD(&n->unused);
416 peer_total++; 407 peer_total++;
417 write_unlock_bh(&peer_pool_lock); 408 write_unlock_bh(&peer_pool_lock);
418 409
@@ -467,10 +458,7 @@ void inet_putpeer(struct inet_peer *p)
467{ 458{
468 spin_lock_bh(&inet_peer_unused_lock); 459 spin_lock_bh(&inet_peer_unused_lock);
469 if (atomic_dec_and_test(&p->refcnt)) { 460 if (atomic_dec_and_test(&p->refcnt)) {
470 p->unused_prevp = inet_peer_unused_tailp; 461 list_add_tail(&p->unused, &unused_peers);
471 p->unused_next = NULL;
472 *inet_peer_unused_tailp = p;
473 inet_peer_unused_tailp = &p->unused_next;
474 p->dtime = (__u32)jiffies; 462 p->dtime = (__u32)jiffies;
475 } 463 }
476 spin_unlock_bh(&inet_peer_unused_lock); 464 spin_unlock_bh(&inet_peer_unused_lock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 877da3ed52e2..0b3b328d82db 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -110,7 +110,7 @@ int ip_forward(struct sk_buff *skb)
110 110
111 skb->priority = rt_tos2priority(iph->tos); 111 skb->priority = rt_tos2priority(iph->tos);
112 112
113 return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev, 113 return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev,
114 ip_forward_finish); 114 ip_forward_finish);
115 115
116sr_failed: 116sr_failed:
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2143bf30597a..a2e92f9709db 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -50,7 +50,7 @@
50 * as well. Or notify me, at least. --ANK 50 * as well. Or notify me, at least. --ANK
51 */ 51 */
52 52
53int sysctl_ipfrag_max_dist __read_mostly = 64; 53static int sysctl_ipfrag_max_dist __read_mostly = 64;
54 54
55struct ipfrag_skb_cb 55struct ipfrag_skb_cb
56{ 56{
@@ -74,35 +74,16 @@ struct ipq {
74 struct inet_peer *peer; 74 struct inet_peer *peer;
75}; 75};
76 76
77struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
78 /*
79 * Fragment cache limits. We will commit 256K at one time. Should we
80 * cross that limit we will prune down to 192K. This should cope with
81 * even the most extreme cases without allowing an attacker to
82 * measurably harm machine performance.
83 */
84 .high_thresh = 256 * 1024,
85 .low_thresh = 192 * 1024,
86
87 /*
88 * Important NOTE! Fragment queue must be destroyed before MSL expires.
89 * RFC791 is wrong proposing to prolongate timer each fragment arrival
90 * by TTL.
91 */
92 .timeout = IP_FRAG_TIME,
93 .secret_interval = 10 * 60 * HZ,
94};
95
96static struct inet_frags ip4_frags; 77static struct inet_frags ip4_frags;
97 78
98int ip_frag_nqueues(void) 79int ip_frag_nqueues(struct net *net)
99{ 80{
100 return ip4_frags.nqueues; 81 return net->ipv4.frags.nqueues;
101} 82}
102 83
103int ip_frag_mem(void) 84int ip_frag_mem(struct net *net)
104{ 85{
105 return atomic_read(&ip4_frags.mem); 86 return atomic_read(&net->ipv4.frags.mem);
106} 87}
107 88
108static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, 89static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -142,11 +123,12 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
142} 123}
143 124
144/* Memory Tracking Functions. */ 125/* Memory Tracking Functions. */
145static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) 126static __inline__ void frag_kfree_skb(struct netns_frags *nf,
127 struct sk_buff *skb, int *work)
146{ 128{
147 if (work) 129 if (work)
148 *work -= skb->truesize; 130 *work -= skb->truesize;
149 atomic_sub(skb->truesize, &ip4_frags.mem); 131 atomic_sub(skb->truesize, &nf->mem);
150 kfree_skb(skb); 132 kfree_skb(skb);
151} 133}
152 134
@@ -192,11 +174,11 @@ static void ipq_kill(struct ipq *ipq)
192/* Memory limiting on fragments. Evictor trashes the oldest 174/* Memory limiting on fragments. Evictor trashes the oldest
193 * fragment queue until we are back under the threshold. 175 * fragment queue until we are back under the threshold.
194 */ 176 */
195static void ip_evictor(void) 177static void ip_evictor(struct net *net)
196{ 178{
197 int evicted; 179 int evicted;
198 180
199 evicted = inet_frag_evictor(&ip4_frags); 181 evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
200 if (evicted) 182 if (evicted)
201 IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); 183 IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
202} 184}
@@ -236,7 +218,7 @@ out:
236/* Find the correct entry in the "incomplete datagrams" queue for 218/* Find the correct entry in the "incomplete datagrams" queue for
237 * this IP datagram, and create new one, if nothing is found. 219 * this IP datagram, and create new one, if nothing is found.
238 */ 220 */
239static inline struct ipq *ip_find(struct iphdr *iph, u32 user) 221static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
240{ 222{
241 struct inet_frag_queue *q; 223 struct inet_frag_queue *q;
242 struct ip4_create_arg arg; 224 struct ip4_create_arg arg;
@@ -246,7 +228,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
246 arg.user = user; 228 arg.user = user;
247 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 229 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
248 230
249 q = inet_frag_find(&ip4_frags, &arg, hash); 231 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
250 if (q == NULL) 232 if (q == NULL)
251 goto out_nomem; 233 goto out_nomem;
252 234
@@ -286,7 +268,7 @@ static int ip_frag_reinit(struct ipq *qp)
286{ 268{
287 struct sk_buff *fp; 269 struct sk_buff *fp;
288 270
289 if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { 271 if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
290 atomic_inc(&qp->q.refcnt); 272 atomic_inc(&qp->q.refcnt);
291 return -ETIMEDOUT; 273 return -ETIMEDOUT;
292 } 274 }
@@ -294,7 +276,7 @@ static int ip_frag_reinit(struct ipq *qp)
294 fp = qp->q.fragments; 276 fp = qp->q.fragments;
295 do { 277 do {
296 struct sk_buff *xp = fp->next; 278 struct sk_buff *xp = fp->next;
297 frag_kfree_skb(fp, NULL); 279 frag_kfree_skb(qp->q.net, fp, NULL);
298 fp = xp; 280 fp = xp;
299 } while (fp); 281 } while (fp);
300 282
@@ -431,7 +413,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
431 qp->q.fragments = next; 413 qp->q.fragments = next;
432 414
433 qp->q.meat -= free_it->len; 415 qp->q.meat -= free_it->len;
434 frag_kfree_skb(free_it, NULL); 416 frag_kfree_skb(qp->q.net, free_it, NULL);
435 } 417 }
436 } 418 }
437 419
@@ -451,7 +433,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
451 } 433 }
452 qp->q.stamp = skb->tstamp; 434 qp->q.stamp = skb->tstamp;
453 qp->q.meat += skb->len; 435 qp->q.meat += skb->len;
454 atomic_add(skb->truesize, &ip4_frags.mem); 436 atomic_add(skb->truesize, &qp->q.net->mem);
455 if (offset == 0) 437 if (offset == 0)
456 qp->q.last_in |= FIRST_IN; 438 qp->q.last_in |= FIRST_IN;
457 439
@@ -459,7 +441,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
459 return ip_frag_reasm(qp, prev, dev); 441 return ip_frag_reasm(qp, prev, dev);
460 442
461 write_lock(&ip4_frags.lock); 443 write_lock(&ip4_frags.lock);
462 list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list); 444 list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
463 write_unlock(&ip4_frags.lock); 445 write_unlock(&ip4_frags.lock);
464 return -EINPROGRESS; 446 return -EINPROGRESS;
465 447
@@ -534,12 +516,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
534 head->len -= clone->len; 516 head->len -= clone->len;
535 clone->csum = 0; 517 clone->csum = 0;
536 clone->ip_summed = head->ip_summed; 518 clone->ip_summed = head->ip_summed;
537 atomic_add(clone->truesize, &ip4_frags.mem); 519 atomic_add(clone->truesize, &qp->q.net->mem);
538 } 520 }
539 521
540 skb_shinfo(head)->frag_list = head->next; 522 skb_shinfo(head)->frag_list = head->next;
541 skb_push(head, head->data - skb_network_header(head)); 523 skb_push(head, head->data - skb_network_header(head));
542 atomic_sub(head->truesize, &ip4_frags.mem); 524 atomic_sub(head->truesize, &qp->q.net->mem);
543 525
544 for (fp=head->next; fp; fp = fp->next) { 526 for (fp=head->next; fp; fp = fp->next) {
545 head->data_len += fp->len; 527 head->data_len += fp->len;
@@ -549,7 +531,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
549 else if (head->ip_summed == CHECKSUM_COMPLETE) 531 else if (head->ip_summed == CHECKSUM_COMPLETE)
550 head->csum = csum_add(head->csum, fp->csum); 532 head->csum = csum_add(head->csum, fp->csum);
551 head->truesize += fp->truesize; 533 head->truesize += fp->truesize;
552 atomic_sub(fp->truesize, &ip4_frags.mem); 534 atomic_sub(fp->truesize, &qp->q.net->mem);
553 } 535 }
554 536
555 head->next = NULL; 537 head->next = NULL;
@@ -582,15 +564,17 @@ out_fail:
582int ip_defrag(struct sk_buff *skb, u32 user) 564int ip_defrag(struct sk_buff *skb, u32 user)
583{ 565{
584 struct ipq *qp; 566 struct ipq *qp;
567 struct net *net;
585 568
586 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); 569 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
587 570
571 net = skb->dev->nd_net;
588 /* Start by cleaning up the memory. */ 572 /* Start by cleaning up the memory. */
589 if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) 573 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
590 ip_evictor(); 574 ip_evictor(net);
591 575
592 /* Lookup (or create) queue header */ 576 /* Lookup (or create) queue header */
593 if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { 577 if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
594 int ret; 578 int ret;
595 579
596 spin_lock(&qp->q.lock); 580 spin_lock(&qp->q.lock);
@@ -607,9 +591,142 @@ int ip_defrag(struct sk_buff *skb, u32 user)
607 return -ENOMEM; 591 return -ENOMEM;
608} 592}
609 593
594#ifdef CONFIG_SYSCTL
595static int zero;
596
597static struct ctl_table ip4_frags_ctl_table[] = {
598 {
599 .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
600 .procname = "ipfrag_high_thresh",
601 .data = &init_net.ipv4.frags.high_thresh,
602 .maxlen = sizeof(int),
603 .mode = 0644,
604 .proc_handler = &proc_dointvec
605 },
606 {
607 .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
608 .procname = "ipfrag_low_thresh",
609 .data = &init_net.ipv4.frags.low_thresh,
610 .maxlen = sizeof(int),
611 .mode = 0644,
612 .proc_handler = &proc_dointvec
613 },
614 {
615 .ctl_name = NET_IPV4_IPFRAG_TIME,
616 .procname = "ipfrag_time",
617 .data = &init_net.ipv4.frags.timeout,
618 .maxlen = sizeof(int),
619 .mode = 0644,
620 .proc_handler = &proc_dointvec_jiffies,
621 .strategy = &sysctl_jiffies
622 },
623 {
624 .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
625 .procname = "ipfrag_secret_interval",
626 .data = &ip4_frags.secret_interval,
627 .maxlen = sizeof(int),
628 .mode = 0644,
629 .proc_handler = &proc_dointvec_jiffies,
630 .strategy = &sysctl_jiffies
631 },
632 {
633 .procname = "ipfrag_max_dist",
634 .data = &sysctl_ipfrag_max_dist,
635 .maxlen = sizeof(int),
636 .mode = 0644,
637 .proc_handler = &proc_dointvec_minmax,
638 .extra1 = &zero
639 },
640 { }
641};
642
643static int ip4_frags_ctl_register(struct net *net)
644{
645 struct ctl_table *table;
646 struct ctl_table_header *hdr;
647
648 table = ip4_frags_ctl_table;
649 if (net != &init_net) {
650 table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL);
651 if (table == NULL)
652 goto err_alloc;
653
654 table[0].data = &net->ipv4.frags.high_thresh;
655 table[1].data = &net->ipv4.frags.low_thresh;
656 table[2].data = &net->ipv4.frags.timeout;
657 table[3].mode &= ~0222;
658 table[4].mode &= ~0222;
659 }
660
661 hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
662 if (hdr == NULL)
663 goto err_reg;
664
665 net->ipv4.frags_hdr = hdr;
666 return 0;
667
668err_reg:
669 if (net != &init_net)
670 kfree(table);
671err_alloc:
672 return -ENOMEM;
673}
674
675static void ip4_frags_ctl_unregister(struct net *net)
676{
677 struct ctl_table *table;
678
679 table = net->ipv4.frags_hdr->ctl_table_arg;
680 unregister_net_sysctl_table(net->ipv4.frags_hdr);
681 kfree(table);
682}
683#else
684static inline int ip4_frags_ctl_register(struct net *net)
685{
686 return 0;
687}
688
689static inline void ip4_frags_ctl_unregister(struct net *net)
690{
691}
692#endif
693
694static int ipv4_frags_init_net(struct net *net)
695{
696 /*
697 * Fragment cache limits. We will commit 256K at one time. Should we
698 * cross that limit we will prune down to 192K. This should cope with
699 * even the most extreme cases without allowing an attacker to
700 * measurably harm machine performance.
701 */
702 net->ipv4.frags.high_thresh = 256 * 1024;
703 net->ipv4.frags.low_thresh = 192 * 1024;
704 /*
705 * Important NOTE! Fragment queue must be destroyed before MSL expires.
706 * RFC791 is wrong proposing to prolongate timer each fragment arrival
707 * by TTL.
708 */
709 net->ipv4.frags.timeout = IP_FRAG_TIME;
710
711 inet_frags_init_net(&net->ipv4.frags);
712
713 return ip4_frags_ctl_register(net);
714}
715
716static void ipv4_frags_exit_net(struct net *net)
717{
718 ip4_frags_ctl_unregister(net);
719 inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
720}
721
722static struct pernet_operations ip4_frags_ops = {
723 .init = ipv4_frags_init_net,
724 .exit = ipv4_frags_exit_net,
725};
726
610void __init ipfrag_init(void) 727void __init ipfrag_init(void)
611{ 728{
612 ip4_frags.ctl = &ip4_frags_ctl; 729 register_pernet_subsys(&ip4_frags_ops);
613 ip4_frags.hashfn = ip4_hashfn; 730 ip4_frags.hashfn = ip4_hashfn;
614 ip4_frags.constructor = ip4_frag_init; 731 ip4_frags.constructor = ip4_frag_init;
615 ip4_frags.destructor = ip4_frag_free; 732 ip4_frags.destructor = ip4_frag_free;
@@ -617,6 +734,7 @@ void __init ipfrag_init(void)
617 ip4_frags.qsize = sizeof(struct ipq); 734 ip4_frags.qsize = sizeof(struct ipq);
618 ip4_frags.match = ip4_frag_match; 735 ip4_frags.match = ip4_frag_match;
619 ip4_frags.frag_expire = ip_expire; 736 ip4_frags.frag_expire = ip_expire;
737 ip4_frags.secret_interval = 10 * 60 * HZ;
620 inet_frags_init(&ip4_frags); 738 inet_frags_init(&ip4_frags);
621} 739}
622 740
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 02b02a8d681c..63f691719353 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -176,7 +176,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
176 } 176 }
177 for (t = tunnels_l[h1]; t; t = t->next) { 177 for (t = tunnels_l[h1]; t; t = t->next) {
178 if (local == t->parms.iph.saddr || 178 if (local == t->parms.iph.saddr ||
179 (local == t->parms.iph.daddr && MULTICAST(local))) { 179 (local == t->parms.iph.daddr &&
180 ipv4_is_multicast(local))) {
180 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
181 return t; 182 return t;
182 } 183 }
@@ -201,7 +202,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
201 202
202 if (local) 203 if (local)
203 prio |= 1; 204 prio |= 1;
204 if (remote && !MULTICAST(remote)) { 205 if (remote && !ipv4_is_multicast(remote)) {
205 prio |= 2; 206 prio |= 2;
206 h ^= HASH(remote); 207 h ^= HASH(remote);
207 } 208 }
@@ -367,7 +368,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
367 368
368 read_lock(&ipgre_lock); 369 read_lock(&ipgre_lock);
369 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0); 370 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
370 if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) 371 if (t == NULL || t->parms.iph.daddr == 0 ||
372 ipv4_is_multicast(t->parms.iph.daddr))
371 goto out; 373 goto out;
372 374
373 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 375 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
@@ -478,7 +480,7 @@ out:
478 fl.fl4_dst = eiph->saddr; 480 fl.fl4_dst = eiph->saddr;
479 fl.fl4_tos = RT_TOS(eiph->tos); 481 fl.fl4_tos = RT_TOS(eiph->tos);
480 fl.proto = IPPROTO_GRE; 482 fl.proto = IPPROTO_GRE;
481 if (ip_route_output_key(&rt, &fl)) { 483 if (ip_route_output_key(&init_net, &rt, &fl)) {
482 kfree_skb(skb2); 484 kfree_skb(skb2);
483 return; 485 return;
484 } 486 }
@@ -491,7 +493,7 @@ out:
491 fl.fl4_dst = eiph->daddr; 493 fl.fl4_dst = eiph->daddr;
492 fl.fl4_src = eiph->saddr; 494 fl.fl4_src = eiph->saddr;
493 fl.fl4_tos = eiph->tos; 495 fl.fl4_tos = eiph->tos;
494 if (ip_route_output_key(&rt, &fl) || 496 if (ip_route_output_key(&init_net, &rt, &fl) ||
495 rt->u.dst.dev->type != ARPHRD_IPGRE) { 497 rt->u.dst.dev->type != ARPHRD_IPGRE) {
496 ip_rt_put(rt); 498 ip_rt_put(rt);
497 kfree_skb(skb2); 499 kfree_skb(skb2);
@@ -613,13 +615,13 @@ static int ipgre_rcv(struct sk_buff *skb)
613 offset += 4; 615 offset += 4;
614 } 616 }
615 617
616 skb_reset_mac_header(skb); 618 skb->mac_header = skb->network_header;
617 __pskb_pull(skb, offset); 619 __pskb_pull(skb, offset);
618 skb_reset_network_header(skb); 620 skb_reset_network_header(skb);
619 skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 621 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
620 skb->pkt_type = PACKET_HOST; 622 skb->pkt_type = PACKET_HOST;
621#ifdef CONFIG_NET_IPGRE_BROADCAST 623#ifdef CONFIG_NET_IPGRE_BROADCAST
622 if (MULTICAST(iph->daddr)) { 624 if (ipv4_is_multicast(iph->daddr)) {
623 /* Looped back packet, drop it! */ 625 /* Looped back packet, drop it! */
624 if (((struct rtable*)skb->dst)->fl.iif == 0) 626 if (((struct rtable*)skb->dst)->fl.iif == 0)
625 goto drop; 627 goto drop;
@@ -746,7 +748,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
746 .saddr = tiph->saddr, 748 .saddr = tiph->saddr,
747 .tos = RT_TOS(tos) } }, 749 .tos = RT_TOS(tos) } },
748 .proto = IPPROTO_GRE }; 750 .proto = IPPROTO_GRE };
749 if (ip_route_output_key(&rt, &fl)) { 751 if (ip_route_output_key(&init_net, &rt, &fl)) {
750 tunnel->stat.tx_carrier_errors++; 752 tunnel->stat.tx_carrier_errors++;
751 goto tx_error; 753 goto tx_error;
752 } 754 }
@@ -783,7 +785,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
783 struct rt6_info *rt6 = (struct rt6_info*)skb->dst; 785 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
784 786
785 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 787 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
786 if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || 788 if ((tunnel->parms.iph.daddr &&
789 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
787 rt6->rt6i_dst.plen == 128) { 790 rt6->rt6i_dst.plen == 128) {
788 rt6->rt6i_flags |= RTF_MODIFIED; 791 rt6->rt6i_flags |= RTF_MODIFIED;
789 skb->dst->metrics[RTAX_MTU-1] = mtu; 792 skb->dst->metrics[RTAX_MTU-1] = mtu;
@@ -896,6 +899,59 @@ tx_error:
896 return 0; 899 return 0;
897} 900}
898 901
902static void ipgre_tunnel_bind_dev(struct net_device *dev)
903{
904 struct net_device *tdev = NULL;
905 struct ip_tunnel *tunnel;
906 struct iphdr *iph;
907 int hlen = LL_MAX_HEADER;
908 int mtu = ETH_DATA_LEN;
909 int addend = sizeof(struct iphdr) + 4;
910
911 tunnel = netdev_priv(dev);
912 iph = &tunnel->parms.iph;
913
914 /* Guess output device to choose reasonable mtu and hard_header_len */
915
916 if (iph->daddr) {
917 struct flowi fl = { .oif = tunnel->parms.link,
918 .nl_u = { .ip4_u =
919 { .daddr = iph->daddr,
920 .saddr = iph->saddr,
921 .tos = RT_TOS(iph->tos) } },
922 .proto = IPPROTO_GRE };
923 struct rtable *rt;
924 if (!ip_route_output_key(&init_net, &rt, &fl)) {
925 tdev = rt->u.dst.dev;
926 ip_rt_put(rt);
927 }
928 dev->flags |= IFF_POINTOPOINT;
929 }
930
931 if (!tdev && tunnel->parms.link)
932 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
933
934 if (tdev) {
935 hlen = tdev->hard_header_len;
936 mtu = tdev->mtu;
937 }
938 dev->iflink = tunnel->parms.link;
939
940 /* Precalculate GRE options length */
941 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942 if (tunnel->parms.o_flags&GRE_CSUM)
943 addend += 4;
944 if (tunnel->parms.o_flags&GRE_KEY)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_SEQ)
947 addend += 4;
948 }
949 dev->hard_header_len = hlen + addend;
950 dev->mtu = mtu - addend;
951 tunnel->hlen = addend;
952
953}
954
899static int 955static int
900ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 956ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
901{ 957{
@@ -956,7 +1012,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
956 1012
957 t = netdev_priv(dev); 1013 t = netdev_priv(dev);
958 1014
959 if (MULTICAST(p.iph.daddr)) 1015 if (ipv4_is_multicast(p.iph.daddr))
960 nflags = IFF_BROADCAST; 1016 nflags = IFF_BROADCAST;
961 else if (p.iph.daddr) 1017 else if (p.iph.daddr)
962 nflags = IFF_POINTOPOINT; 1018 nflags = IFF_POINTOPOINT;
@@ -983,6 +1039,11 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
983 t->parms.iph.ttl = p.iph.ttl; 1039 t->parms.iph.ttl = p.iph.ttl;
984 t->parms.iph.tos = p.iph.tos; 1040 t->parms.iph.tos = p.iph.tos;
985 t->parms.iph.frag_off = p.iph.frag_off; 1041 t->parms.iph.frag_off = p.iph.frag_off;
1042 if (t->parms.link != p.link) {
1043 t->parms.link = p.link;
1044 ipgre_tunnel_bind_dev(dev);
1045 netdev_state_change(dev);
1046 }
986 } 1047 }
987 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 1048 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
988 err = -EFAULT; 1049 err = -EFAULT;
@@ -1085,7 +1146,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1085 memcpy(&iph->daddr, daddr, 4); 1146 memcpy(&iph->daddr, daddr, 4);
1086 return t->hlen; 1147 return t->hlen;
1087 } 1148 }
1088 if (iph->daddr && !MULTICAST(iph->daddr)) 1149 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1089 return t->hlen; 1150 return t->hlen;
1090 1151
1091 return -t->hlen; 1152 return -t->hlen;
@@ -1108,7 +1169,7 @@ static int ipgre_open(struct net_device *dev)
1108{ 1169{
1109 struct ip_tunnel *t = netdev_priv(dev); 1170 struct ip_tunnel *t = netdev_priv(dev);
1110 1171
1111 if (MULTICAST(t->parms.iph.daddr)) { 1172 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1112 struct flowi fl = { .oif = t->parms.link, 1173 struct flowi fl = { .oif = t->parms.link,
1113 .nl_u = { .ip4_u = 1174 .nl_u = { .ip4_u =
1114 { .daddr = t->parms.iph.daddr, 1175 { .daddr = t->parms.iph.daddr,
@@ -1116,7 +1177,7 @@ static int ipgre_open(struct net_device *dev)
1116 .tos = RT_TOS(t->parms.iph.tos) } }, 1177 .tos = RT_TOS(t->parms.iph.tos) } },
1117 .proto = IPPROTO_GRE }; 1178 .proto = IPPROTO_GRE };
1118 struct rtable *rt; 1179 struct rtable *rt;
1119 if (ip_route_output_key(&rt, &fl)) 1180 if (ip_route_output_key(&init_net, &rt, &fl))
1120 return -EADDRNOTAVAIL; 1181 return -EADDRNOTAVAIL;
1121 dev = rt->u.dst.dev; 1182 dev = rt->u.dst.dev;
1122 ip_rt_put(rt); 1183 ip_rt_put(rt);
@@ -1131,8 +1192,9 @@ static int ipgre_open(struct net_device *dev)
1131static int ipgre_close(struct net_device *dev) 1192static int ipgre_close(struct net_device *dev)
1132{ 1193{
1133 struct ip_tunnel *t = netdev_priv(dev); 1194 struct ip_tunnel *t = netdev_priv(dev);
1134 if (MULTICAST(t->parms.iph.daddr) && t->mlink) { 1195 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1135 struct in_device *in_dev = inetdev_by_index(t->mlink); 1196 struct in_device *in_dev;
1197 in_dev = inetdev_by_index(dev->nd_net, t->mlink);
1136 if (in_dev) { 1198 if (in_dev) {
1137 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1199 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1138 in_dev_put(in_dev); 1200 in_dev_put(in_dev);
@@ -1162,12 +1224,8 @@ static void ipgre_tunnel_setup(struct net_device *dev)
1162 1224
1163static int ipgre_tunnel_init(struct net_device *dev) 1225static int ipgre_tunnel_init(struct net_device *dev)
1164{ 1226{
1165 struct net_device *tdev = NULL;
1166 struct ip_tunnel *tunnel; 1227 struct ip_tunnel *tunnel;
1167 struct iphdr *iph; 1228 struct iphdr *iph;
1168 int hlen = LL_MAX_HEADER;
1169 int mtu = ETH_DATA_LEN;
1170 int addend = sizeof(struct iphdr) + 4;
1171 1229
1172 tunnel = netdev_priv(dev); 1230 tunnel = netdev_priv(dev);
1173 iph = &tunnel->parms.iph; 1231 iph = &tunnel->parms.iph;
@@ -1178,25 +1236,11 @@ static int ipgre_tunnel_init(struct net_device *dev)
1178 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1236 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1179 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1237 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1180 1238
1181 /* Guess output device to choose reasonable mtu and hard_header_len */ 1239 ipgre_tunnel_bind_dev(dev);
1182 1240
1183 if (iph->daddr) { 1241 if (iph->daddr) {
1184 struct flowi fl = { .oif = tunnel->parms.link,
1185 .nl_u = { .ip4_u =
1186 { .daddr = iph->daddr,
1187 .saddr = iph->saddr,
1188 .tos = RT_TOS(iph->tos) } },
1189 .proto = IPPROTO_GRE };
1190 struct rtable *rt;
1191 if (!ip_route_output_key(&rt, &fl)) {
1192 tdev = rt->u.dst.dev;
1193 ip_rt_put(rt);
1194 }
1195
1196 dev->flags |= IFF_POINTOPOINT;
1197
1198#ifdef CONFIG_NET_IPGRE_BROADCAST 1242#ifdef CONFIG_NET_IPGRE_BROADCAST
1199 if (MULTICAST(iph->daddr)) { 1243 if (ipv4_is_multicast(iph->daddr)) {
1200 if (!iph->saddr) 1244 if (!iph->saddr)
1201 return -EINVAL; 1245 return -EINVAL;
1202 dev->flags = IFF_BROADCAST; 1246 dev->flags = IFF_BROADCAST;
@@ -1205,31 +1249,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
1205 dev->stop = ipgre_close; 1249 dev->stop = ipgre_close;
1206 } 1250 }
1207#endif 1251#endif
1208 } else { 1252 } else
1209 dev->header_ops = &ipgre_header_ops; 1253 dev->header_ops = &ipgre_header_ops;
1210 }
1211
1212 if (!tdev && tunnel->parms.link)
1213 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
1214
1215 if (tdev) {
1216 hlen = tdev->hard_header_len;
1217 mtu = tdev->mtu;
1218 }
1219 dev->iflink = tunnel->parms.link;
1220 1254
1221 /* Precalculate GRE options length */
1222 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1223 if (tunnel->parms.o_flags&GRE_CSUM)
1224 addend += 4;
1225 if (tunnel->parms.o_flags&GRE_KEY)
1226 addend += 4;
1227 if (tunnel->parms.o_flags&GRE_SEQ)
1228 addend += 4;
1229 }
1230 dev->hard_header_len = hlen + addend;
1231 dev->mtu = mtu - addend;
1232 tunnel->hlen = addend;
1233 return 0; 1255 return 0;
1234} 1256}
1235 1257
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 168c871fcd79..65631391d479 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -204,22 +204,14 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
204 204
205 rcu_read_lock(); 205 rcu_read_lock();
206 { 206 {
207 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
208 int protocol = ip_hdr(skb)->protocol; 207 int protocol = ip_hdr(skb)->protocol;
209 int hash; 208 int hash, raw;
210 struct sock *raw_sk;
211 struct net_protocol *ipprot; 209 struct net_protocol *ipprot;
212 210
213 resubmit: 211 resubmit:
214 hash = protocol & (MAX_INET_PROTOS - 1); 212 raw = raw_local_deliver(skb, protocol);
215 raw_sk = sk_head(&raw_v4_htable[hash]);
216
217 /* If there maybe a raw socket we must check - if not we
218 * don't care less
219 */
220 if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
221 raw_sk = NULL;
222 213
214 hash = protocol & (MAX_INET_PROTOS - 1);
223 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 215 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
224 int ret; 216 int ret;
225 217
@@ -237,7 +229,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
237 } 229 }
238 IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); 230 IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
239 } else { 231 } else {
240 if (!raw_sk) { 232 if (!raw) {
241 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 233 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
242 IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); 234 IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
243 icmp_send(skb, ICMP_DEST_UNREACH, 235 icmp_send(skb, ICMP_DEST_UNREACH,
@@ -268,7 +260,7 @@ int ip_local_deliver(struct sk_buff *skb)
268 return 0; 260 return 0;
269 } 261 }
270 262
271 return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL, 263 return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
272 ip_local_deliver_finish); 264 ip_local_deliver_finish);
273} 265}
274 266
@@ -347,7 +339,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
347 339
348#ifdef CONFIG_NET_CLS_ROUTE 340#ifdef CONFIG_NET_CLS_ROUTE
349 if (unlikely(skb->dst->tclassid)) { 341 if (unlikely(skb->dst->tclassid)) {
350 struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id(); 342 struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
351 u32 idx = skb->dst->tclassid; 343 u32 idx = skb->dst->tclassid;
352 st[idx&0xFF].o_packets++; 344 st[idx&0xFF].o_packets++;
353 st[idx&0xFF].o_bytes+=skb->len; 345 st[idx&0xFF].o_bytes+=skb->len;
@@ -442,7 +434,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
442 /* Remove any debris in the socket control block */ 434 /* Remove any debris in the socket control block */
443 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 435 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
444 436
445 return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, 437 return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
446 ip_rcv_finish); 438 ip_rcv_finish);
447 439
448inhdr_error: 440inhdr_error:
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2f14745a9e1f..4d315158fd3c 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -151,7 +151,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
151 __be32 addr; 151 __be32 addr;
152 152
153 memcpy(&addr, sptr+soffset-1, 4); 153 memcpy(&addr, sptr+soffset-1, 4);
154 if (inet_addr_type(addr) != RTN_LOCAL) { 154 if (inet_addr_type(&init_net, addr) != RTN_LOCAL) {
155 dopt->ts_needtime = 1; 155 dopt->ts_needtime = 1;
156 soffset += 8; 156 soffset += 8;
157 } 157 }
@@ -400,7 +400,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
400 { 400 {
401 __be32 addr; 401 __be32 addr;
402 memcpy(&addr, &optptr[optptr[2]-1], 4); 402 memcpy(&addr, &optptr[optptr[2]-1], 4);
403 if (inet_addr_type(addr) == RTN_UNICAST) 403 if (inet_addr_type(&init_net, addr) == RTN_UNICAST)
404 break; 404 break;
405 if (skb) 405 if (skb)
406 timeptr = (__be32*)&optptr[optptr[2]+3]; 406 timeptr = (__be32*)&optptr[optptr[2]+3];
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5f7dc2de303..341779e685d9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -91,6 +91,28 @@ __inline__ void ip_send_check(struct iphdr *iph)
91 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 91 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
92} 92}
93 93
94int __ip_local_out(struct sk_buff *skb)
95{
96 struct iphdr *iph = ip_hdr(skb);
97
98 iph->tot_len = htons(skb->len);
99 ip_send_check(iph);
100 return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
101 dst_output);
102}
103
104int ip_local_out(struct sk_buff *skb)
105{
106 int err;
107
108 err = __ip_local_out(skb);
109 if (likely(err == 1))
110 err = dst_output(skb);
111
112 return err;
113}
114EXPORT_SYMBOL_GPL(ip_local_out);
115
94/* dev_loopback_xmit for use with netfilter. */ 116/* dev_loopback_xmit for use with netfilter. */
95static int ip_dev_loopback_xmit(struct sk_buff *newskb) 117static int ip_dev_loopback_xmit(struct sk_buff *newskb)
96{ 118{
@@ -138,20 +160,18 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
138 iph->daddr = rt->rt_dst; 160 iph->daddr = rt->rt_dst;
139 iph->saddr = rt->rt_src; 161 iph->saddr = rt->rt_src;
140 iph->protocol = sk->sk_protocol; 162 iph->protocol = sk->sk_protocol;
141 iph->tot_len = htons(skb->len);
142 ip_select_ident(iph, &rt->u.dst, sk); 163 ip_select_ident(iph, &rt->u.dst, sk);
143 164
144 if (opt && opt->optlen) { 165 if (opt && opt->optlen) {
145 iph->ihl += opt->optlen>>2; 166 iph->ihl += opt->optlen>>2;
146 ip_options_build(skb, opt, daddr, rt, 0); 167 ip_options_build(skb, opt, daddr, rt, 0);
147 } 168 }
148 ip_send_check(iph);
149 169
150 skb->priority = sk->sk_priority; 170 skb->priority = sk->sk_priority;
171 skb->mark = sk->sk_mark;
151 172
152 /* Send it out. */ 173 /* Send it out. */
153 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 174 return ip_local_out(skb);
154 dst_output);
155} 175}
156 176
157EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 177EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
@@ -251,8 +271,8 @@ int ip_mc_output(struct sk_buff *skb)
251 ) { 271 ) {
252 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 272 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
253 if (newskb) 273 if (newskb)
254 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL, 274 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb,
255 newskb->dev, 275 NULL, newskb->dev,
256 ip_dev_loopback_xmit); 276 ip_dev_loopback_xmit);
257 } 277 }
258 278
@@ -267,11 +287,11 @@ int ip_mc_output(struct sk_buff *skb)
267 if (rt->rt_flags&RTCF_BROADCAST) { 287 if (rt->rt_flags&RTCF_BROADCAST) {
268 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 288 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
269 if (newskb) 289 if (newskb)
270 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL, 290 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL,
271 newskb->dev, ip_dev_loopback_xmit); 291 newskb->dev, ip_dev_loopback_xmit);
272 } 292 }
273 293
274 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, 294 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
275 ip_finish_output, 295 ip_finish_output,
276 !(IPCB(skb)->flags & IPSKB_REROUTED)); 296 !(IPCB(skb)->flags & IPSKB_REROUTED));
277} 297}
@@ -285,7 +305,7 @@ int ip_output(struct sk_buff *skb)
285 skb->dev = dev; 305 skb->dev = dev;
286 skb->protocol = htons(ETH_P_IP); 306 skb->protocol = htons(ETH_P_IP);
287 307
288 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, 308 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
289 ip_finish_output, 309 ip_finish_output,
290 !(IPCB(skb)->flags & IPSKB_REROUTED)); 310 !(IPCB(skb)->flags & IPSKB_REROUTED));
291} 311}
@@ -331,7 +351,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
331 * itself out. 351 * itself out.
332 */ 352 */
333 security_sk_classify_flow(sk, &fl); 353 security_sk_classify_flow(sk, &fl);
334 if (ip_route_output_flow(&rt, &fl, sk, 0)) 354 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0))
335 goto no_route; 355 goto no_route;
336 } 356 }
337 sk_setup_caps(sk, &rt->u.dst); 357 sk_setup_caps(sk, &rt->u.dst);
@@ -347,7 +367,6 @@ packet_routed:
347 skb_reset_network_header(skb); 367 skb_reset_network_header(skb);
348 iph = ip_hdr(skb); 368 iph = ip_hdr(skb);
349 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 369 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
350 iph->tot_len = htons(skb->len);
351 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 370 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
352 iph->frag_off = htons(IP_DF); 371 iph->frag_off = htons(IP_DF);
353 else 372 else
@@ -366,13 +385,10 @@ packet_routed:
366 ip_select_ident_more(iph, &rt->u.dst, sk, 385 ip_select_ident_more(iph, &rt->u.dst, sk,
367 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 386 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
368 387
369 /* Add an IP checksum. */
370 ip_send_check(iph);
371
372 skb->priority = sk->sk_priority; 388 skb->priority = sk->sk_priority;
389 skb->mark = sk->sk_mark;
373 390
374 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 391 return ip_local_out(skb);
375 dst_output);
376 392
377no_route: 393no_route:
378 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 394 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
@@ -462,6 +478,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
462 if (skb_shinfo(skb)->frag_list) { 478 if (skb_shinfo(skb)->frag_list) {
463 struct sk_buff *frag; 479 struct sk_buff *frag;
464 int first_len = skb_pagelen(skb); 480 int first_len = skb_pagelen(skb);
481 int truesizes = 0;
465 482
466 if (first_len - hlen > mtu || 483 if (first_len - hlen > mtu ||
467 ((first_len - hlen) & 7) || 484 ((first_len - hlen) & 7) ||
@@ -485,7 +502,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
485 sock_hold(skb->sk); 502 sock_hold(skb->sk);
486 frag->sk = skb->sk; 503 frag->sk = skb->sk;
487 frag->destructor = sock_wfree; 504 frag->destructor = sock_wfree;
488 skb->truesize -= frag->truesize; 505 truesizes += frag->truesize;
489 } 506 }
490 } 507 }
491 508
@@ -496,6 +513,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
496 frag = skb_shinfo(skb)->frag_list; 513 frag = skb_shinfo(skb)->frag_list;
497 skb_shinfo(skb)->frag_list = NULL; 514 skb_shinfo(skb)->frag_list = NULL;
498 skb->data_len = first_len - skb_headlen(skb); 515 skb->data_len = first_len - skb_headlen(skb);
516 skb->truesize -= truesizes;
499 skb->len = first_len; 517 skb->len = first_len;
500 iph->tot_len = htons(first_len); 518 iph->tot_len = htons(first_len);
501 iph->frag_off = htons(IP_MF); 519 iph->frag_off = htons(IP_MF);
@@ -1016,8 +1034,6 @@ alloc_new_skb:
1016 1034
1017 skb_fill_page_desc(skb, i, page, 0, 0); 1035 skb_fill_page_desc(skb, i, page, 0, 0);
1018 frag = &skb_shinfo(skb)->frags[i]; 1036 frag = &skb_shinfo(skb)->frags[i];
1019 skb->truesize += PAGE_SIZE;
1020 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1021 } else { 1037 } else {
1022 err = -EMSGSIZE; 1038 err = -EMSGSIZE;
1023 goto error; 1039 goto error;
@@ -1030,6 +1046,8 @@ alloc_new_skb:
1030 frag->size += copy; 1046 frag->size += copy;
1031 skb->len += copy; 1047 skb->len += copy;
1032 skb->data_len += copy; 1048 skb->data_len += copy;
1049 skb->truesize += copy;
1050 atomic_add(copy, &sk->sk_wmem_alloc);
1033 } 1051 }
1034 offset += copy; 1052 offset += copy;
1035 length -= copy; 1053 length -= copy;
@@ -1172,6 +1190,8 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1172 1190
1173 skb->len += len; 1191 skb->len += len;
1174 skb->data_len += len; 1192 skb->data_len += len;
1193 skb->truesize += len;
1194 atomic_add(len, &sk->sk_wmem_alloc);
1175 offset += len; 1195 offset += len;
1176 size -= len; 1196 size -= len;
1177 } 1197 }
@@ -1183,6 +1203,17 @@ error:
1183 return err; 1203 return err;
1184} 1204}
1185 1205
1206static void ip_cork_release(struct inet_sock *inet)
1207{
1208 inet->cork.flags &= ~IPCORK_OPT;
1209 kfree(inet->cork.opt);
1210 inet->cork.opt = NULL;
1211 if (inet->cork.rt) {
1212 ip_rt_put(inet->cork.rt);
1213 inet->cork.rt = NULL;
1214 }
1215}
1216
1186/* 1217/*
1187 * Combined all pending IP fragments on the socket as one IP datagram 1218 * Combined all pending IP fragments on the socket as one IP datagram
1188 * and push them out. 1219 * and push them out.
@@ -1249,16 +1280,15 @@ int ip_push_pending_frames(struct sock *sk)
1249 ip_options_build(skb, opt, inet->cork.addr, rt, 0); 1280 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1250 } 1281 }
1251 iph->tos = inet->tos; 1282 iph->tos = inet->tos;
1252 iph->tot_len = htons(skb->len);
1253 iph->frag_off = df; 1283 iph->frag_off = df;
1254 ip_select_ident(iph, &rt->u.dst, sk); 1284 ip_select_ident(iph, &rt->u.dst, sk);
1255 iph->ttl = ttl; 1285 iph->ttl = ttl;
1256 iph->protocol = sk->sk_protocol; 1286 iph->protocol = sk->sk_protocol;
1257 iph->saddr = rt->rt_src; 1287 iph->saddr = rt->rt_src;
1258 iph->daddr = rt->rt_dst; 1288 iph->daddr = rt->rt_dst;
1259 ip_send_check(iph);
1260 1289
1261 skb->priority = sk->sk_priority; 1290 skb->priority = sk->sk_priority;
1291 skb->mark = sk->sk_mark;
1262 skb->dst = dst_clone(&rt->u.dst); 1292 skb->dst = dst_clone(&rt->u.dst);
1263 1293
1264 if (iph->protocol == IPPROTO_ICMP) 1294 if (iph->protocol == IPPROTO_ICMP)
@@ -1266,8 +1296,7 @@ int ip_push_pending_frames(struct sock *sk)
1266 skb_transport_header(skb))->type); 1296 skb_transport_header(skb))->type);
1267 1297
1268 /* Netfilter gets whole the not fragmented skb. */ 1298 /* Netfilter gets whole the not fragmented skb. */
1269 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 1299 err = ip_local_out(skb);
1270 skb->dst->dev, dst_output);
1271 if (err) { 1300 if (err) {
1272 if (err > 0) 1301 if (err > 0)
1273 err = inet->recverr ? net_xmit_errno(err) : 0; 1302 err = inet->recverr ? net_xmit_errno(err) : 0;
@@ -1276,13 +1305,7 @@ int ip_push_pending_frames(struct sock *sk)
1276 } 1305 }
1277 1306
1278out: 1307out:
1279 inet->cork.flags &= ~IPCORK_OPT; 1308 ip_cork_release(inet);
1280 kfree(inet->cork.opt);
1281 inet->cork.opt = NULL;
1282 if (inet->cork.rt) {
1283 ip_rt_put(inet->cork.rt);
1284 inet->cork.rt = NULL;
1285 }
1286 return err; 1309 return err;
1287 1310
1288error: 1311error:
@@ -1295,19 +1318,12 @@ error:
1295 */ 1318 */
1296void ip_flush_pending_frames(struct sock *sk) 1319void ip_flush_pending_frames(struct sock *sk)
1297{ 1320{
1298 struct inet_sock *inet = inet_sk(sk);
1299 struct sk_buff *skb; 1321 struct sk_buff *skb;
1300 1322
1301 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) 1323 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1302 kfree_skb(skb); 1324 kfree_skb(skb);
1303 1325
1304 inet->cork.flags &= ~IPCORK_OPT; 1326 ip_cork_release(inet_sk(sk));
1305 kfree(inet->cork.opt);
1306 inet->cork.opt = NULL;
1307 if (inet->cork.rt) {
1308 ip_rt_put(inet->cork.rt);
1309 inet->cork.rt = NULL;
1310 }
1311} 1327}
1312 1328
1313 1329
@@ -1330,8 +1346,6 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1330 * 1346 *
1331 * Should run single threaded per socket because it uses the sock 1347 * Should run single threaded per socket because it uses the sock
1332 * structure to pass arguments. 1348 * structure to pass arguments.
1333 *
1334 * LATER: switch from ip_build_xmit to ip_append_*
1335 */ 1349 */
1336void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 1350void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1337 unsigned int len) 1351 unsigned int len)
@@ -1370,7 +1384,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1370 .dport = tcp_hdr(skb)->source } }, 1384 .dport = tcp_hdr(skb)->source } },
1371 .proto = sk->sk_protocol }; 1385 .proto = sk->sk_protocol };
1372 security_skb_classify_flow(skb, &fl); 1386 security_skb_classify_flow(skb, &fl);
1373 if (ip_route_output_key(&rt, &fl)) 1387 if (ip_route_output_key(sk->sk_net, &rt, &fl))
1374 return; 1388 return;
1375 } 1389 }
1376 1390
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f51f20e487c8..754b0a5bbfe9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -437,10 +437,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
437 437
438 /* If optlen==0, it is equivalent to val == 0 */ 438 /* If optlen==0, it is equivalent to val == 0 */
439 439
440#ifdef CONFIG_IP_MROUTE 440 if (ip_mroute_opt(optname))
441 if (optname >= MRT_BASE && optname <= (MRT_BASE + 10))
442 return ip_mroute_setsockopt(sk,optname,optval,optlen); 441 return ip_mroute_setsockopt(sk,optname,optval,optlen);
443#endif
444 442
445 err = 0; 443 err = 0;
446 lock_sock(sk); 444 lock_sock(sk);
@@ -596,7 +594,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
596 err = 0; 594 err = 0;
597 break; 595 break;
598 } 596 }
599 dev = ip_dev_find(mreq.imr_address.s_addr); 597 dev = ip_dev_find(&init_net, mreq.imr_address.s_addr);
600 if (dev) { 598 if (dev) {
601 mreq.imr_ifindex = dev->ifindex; 599 mreq.imr_ifindex = dev->ifindex;
602 dev_put(dev); 600 dev_put(dev);
@@ -909,11 +907,9 @@ int ip_setsockopt(struct sock *sk, int level,
909#ifdef CONFIG_NETFILTER 907#ifdef CONFIG_NETFILTER
910 /* we need to exclude all possible ENOPROTOOPTs except default case */ 908 /* we need to exclude all possible ENOPROTOOPTs except default case */
911 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 909 if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
912 optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY 910 optname != IP_IPSEC_POLICY &&
913#ifdef CONFIG_IP_MROUTE 911 optname != IP_XFRM_POLICY &&
914 && (optname < MRT_BASE || optname > (MRT_BASE + 10)) 912 !ip_mroute_opt(optname)) {
915#endif
916 ) {
917 lock_sock(sk); 913 lock_sock(sk);
918 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); 914 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
919 release_sock(sk); 915 release_sock(sk);
@@ -935,11 +931,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
935#ifdef CONFIG_NETFILTER 931#ifdef CONFIG_NETFILTER
936 /* we need to exclude all possible ENOPROTOOPTs except default case */ 932 /* we need to exclude all possible ENOPROTOOPTs except default case */
937 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 933 if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
938 optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY 934 optname != IP_IPSEC_POLICY &&
939#ifdef CONFIG_IP_MROUTE 935 optname != IP_XFRM_POLICY &&
940 && (optname < MRT_BASE || optname > (MRT_BASE + 10)) 936 !ip_mroute_opt(optname)) {
941#endif
942 ) {
943 lock_sock(sk); 937 lock_sock(sk);
944 err = compat_nf_setsockopt(sk, PF_INET, optname, 938 err = compat_nf_setsockopt(sk, PF_INET, optname,
945 optval, optlen); 939 optval, optlen);
@@ -967,11 +961,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
967 if (level != SOL_IP) 961 if (level != SOL_IP)
968 return -EOPNOTSUPP; 962 return -EOPNOTSUPP;
969 963
970#ifdef CONFIG_IP_MROUTE 964 if (ip_mroute_opt(optname))
971 if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
972 return ip_mroute_getsockopt(sk,optname,optval,optlen); 965 return ip_mroute_getsockopt(sk,optname,optval,optlen);
973 }
974#endif
975 966
976 if (get_user(len,optlen)) 967 if (get_user(len,optlen))
977 return -EFAULT; 968 return -EFAULT;
@@ -1171,11 +1162,8 @@ int ip_getsockopt(struct sock *sk, int level,
1171 err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1162 err = do_ip_getsockopt(sk, level, optname, optval, optlen);
1172#ifdef CONFIG_NETFILTER 1163#ifdef CONFIG_NETFILTER
1173 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1164 /* we need to exclude all possible ENOPROTOOPTs except default case */
1174 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS 1165 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1175#ifdef CONFIG_IP_MROUTE 1166 !ip_mroute_opt(optname)) {
1176 && (optname < MRT_BASE || optname > MRT_BASE+10)
1177#endif
1178 ) {
1179 int len; 1167 int len;
1180 1168
1181 if (get_user(len,optlen)) 1169 if (get_user(len,optlen))
@@ -1200,11 +1188,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1200 int err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1188 int err = do_ip_getsockopt(sk, level, optname, optval, optlen);
1201#ifdef CONFIG_NETFILTER 1189#ifdef CONFIG_NETFILTER
1202 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1190 /* we need to exclude all possible ENOPROTOOPTs except default case */
1203 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS 1191 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1204#ifdef CONFIG_IP_MROUTE 1192 !ip_mroute_opt(optname)) {
1205 && (optname < MRT_BASE || optname > MRT_BASE+10)
1206#endif
1207 ) {
1208 int len; 1193 int len;
1209 1194
1210 if (get_user(len, optlen)) 1195 if (get_user(len, optlen))
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 0bfeb02a5f87..ae1f45fc23b9 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -14,9 +14,9 @@
14 * - Adaptive compression. 14 * - Adaptive compression.
15 */ 15 */
16#include <linux/module.h> 16#include <linux/module.h>
17#include <asm/scatterlist.h>
18#include <asm/semaphore.h> 17#include <asm/semaphore.h>
19#include <linux/crypto.h> 18#include <linux/crypto.h>
19#include <linux/err.h>
20#include <linux/pfkeyv2.h> 20#include <linux/pfkeyv2.h>
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/smp.h> 22#include <linux/smp.h>
@@ -74,6 +74,7 @@ out:
74 74
75static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) 75static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
76{ 76{
77 int nexthdr;
77 int err = -ENOMEM; 78 int err = -ENOMEM;
78 struct ip_comp_hdr *ipch; 79 struct ip_comp_hdr *ipch;
79 80
@@ -84,13 +85,15 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
84 85
85 /* Remove ipcomp header and decompress original payload */ 86 /* Remove ipcomp header and decompress original payload */
86 ipch = (void *)skb->data; 87 ipch = (void *)skb->data;
88 nexthdr = ipch->nexthdr;
89
87 skb->transport_header = skb->network_header + sizeof(*ipch); 90 skb->transport_header = skb->network_header + sizeof(*ipch);
88 __skb_pull(skb, sizeof(*ipch)); 91 __skb_pull(skb, sizeof(*ipch));
89 err = ipcomp_decompress(x, skb); 92 err = ipcomp_decompress(x, skb);
90 if (err) 93 if (err)
91 goto out; 94 goto out;
92 95
93 err = ipch->nexthdr; 96 err = nexthdr;
94 97
95out: 98out:
96 return err; 99 return err;
@@ -182,7 +185,6 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
182static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) 185static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
183{ 186{
184 struct xfrm_state *t; 187 struct xfrm_state *t;
185 u8 mode = XFRM_MODE_TUNNEL;
186 188
187 t = xfrm_state_alloc(); 189 t = xfrm_state_alloc();
188 if (t == NULL) 190 if (t == NULL)
@@ -193,9 +195,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
193 t->id.daddr.a4 = x->id.daddr.a4; 195 t->id.daddr.a4 = x->id.daddr.a4;
194 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 196 memcpy(&t->sel, &x->sel, sizeof(t->sel));
195 t->props.family = AF_INET; 197 t->props.family = AF_INET;
196 if (x->props.mode == XFRM_MODE_BEET) 198 t->props.mode = x->props.mode;
197 mode = x->props.mode;
198 t->props.mode = mode;
199 t->props.saddr.a4 = x->props.saddr.a4; 199 t->props.saddr.a4 = x->props.saddr.a4;
200 t->props.flags = x->props.flags; 200 t->props.flags = x->props.flags;
201 201
@@ -345,7 +345,7 @@ static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
345 for_each_possible_cpu(cpu) { 345 for_each_possible_cpu(cpu) {
346 struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, 346 struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
347 CRYPTO_ALG_ASYNC); 347 CRYPTO_ALG_ASYNC);
348 if (!tfm) 348 if (IS_ERR(tfm))
349 goto error; 349 goto error;
350 *per_cpu_ptr(tfms, cpu) = tfm; 350 *per_cpu_ptr(tfms, cpu) = tfm;
351 } 351 }
@@ -389,15 +389,22 @@ static int ipcomp_init_state(struct xfrm_state *x)
389 if (x->encap) 389 if (x->encap)
390 goto out; 390 goto out;
391 391
392 x->props.header_len = 0;
393 switch (x->props.mode) {
394 case XFRM_MODE_TRANSPORT:
395 break;
396 case XFRM_MODE_TUNNEL:
397 x->props.header_len += sizeof(struct iphdr);
398 break;
399 default:
400 goto out;
401 }
402
392 err = -ENOMEM; 403 err = -ENOMEM;
393 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); 404 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
394 if (!ipcd) 405 if (!ipcd)
395 goto out; 406 goto out;
396 407
397 x->props.header_len = 0;
398 if (x->props.mode == XFRM_MODE_TUNNEL)
399 x->props.header_len += sizeof(struct iphdr);
400
401 mutex_lock(&ipcomp_resource_mutex); 408 mutex_lock(&ipcomp_resource_mutex);
402 if (!ipcomp_alloc_scratches()) 409 if (!ipcomp_alloc_scratches())
403 goto error; 410 goto error;
@@ -430,7 +437,7 @@ error:
430 goto out; 437 goto out;
431} 438}
432 439
433static struct xfrm_type ipcomp_type = { 440static const struct xfrm_type ipcomp_type = {
434 .description = "IPCOMP4", 441 .description = "IPCOMP4",
435 .owner = THIS_MODULE, 442 .owner = THIS_MODULE,
436 .proto = IPPROTO_COMP, 443 .proto = IPPROTO_COMP,
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index c5c107a01823..a52b5853aaa8 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -140,6 +140,9 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */
140__be32 root_server_addr = NONE; /* Address of NFS server */ 140__be32 root_server_addr = NONE; /* Address of NFS server */
141u8 root_server_path[256] = { 0, }; /* Path to mount as root */ 141u8 root_server_path[256] = { 0, }; /* Path to mount as root */
142 142
143/* vendor class identifier */
144static char vendor_class_identifier[253] __initdata;
145
143/* Persistent data: */ 146/* Persistent data: */
144 147
145static int ic_proto_used; /* Protocol used, if any */ 148static int ic_proto_used; /* Protocol used, if any */
@@ -299,7 +302,7 @@ static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
299 302
300 mm_segment_t oldfs = get_fs(); 303 mm_segment_t oldfs = get_fs();
301 set_fs(get_ds()); 304 set_fs(get_ds());
302 res = ip_rt_ioctl(cmd, (void __user *) arg); 305 res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
303 set_fs(oldfs); 306 set_fs(oldfs);
304 return res; 307 return res;
305} 308}
@@ -588,6 +591,7 @@ ic_dhcp_init_options(u8 *options)
588 u8 mt = ((ic_servaddr == NONE) 591 u8 mt = ((ic_servaddr == NONE)
589 ? DHCPDISCOVER : DHCPREQUEST); 592 ? DHCPDISCOVER : DHCPREQUEST);
590 u8 *e = options; 593 u8 *e = options;
594 int len;
591 595
592#ifdef IPCONFIG_DEBUG 596#ifdef IPCONFIG_DEBUG
593 printk("DHCP: Sending message type %d\n", mt); 597 printk("DHCP: Sending message type %d\n", mt);
@@ -628,6 +632,16 @@ ic_dhcp_init_options(u8 *options)
628 *e++ = sizeof(ic_req_params); 632 *e++ = sizeof(ic_req_params);
629 memcpy(e, ic_req_params, sizeof(ic_req_params)); 633 memcpy(e, ic_req_params, sizeof(ic_req_params));
630 e += sizeof(ic_req_params); 634 e += sizeof(ic_req_params);
635
636 if (*vendor_class_identifier) {
637 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
638 vendor_class_identifier);
639 *e++ = 60; /* Class-identifier */
640 len = strlen(vendor_class_identifier);
641 *e++ = len;
642 memcpy(e, vendor_class_identifier, len);
643 e += len;
644 }
631 } 645 }
632 646
633 *e++ = 255; /* End of the list */ 647 *e++ = 255; /* End of the list */
@@ -1396,31 +1410,16 @@ late_initcall(ip_auto_config);
1396 1410
1397/* 1411/*
1398 * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel 1412 * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
1399 * command line parameter. It consists of option fields separated by colons in 1413 * command line parameter. See Documentation/nfsroot.txt.
1400 * the following order:
1401 *
1402 * <client-ip>:<server-ip>:<gw-ip>:<netmask>:<host name>:<device>:<PROTO>
1403 *
1404 * Any of the fields can be empty which means to use a default value:
1405 * <client-ip> - address given by BOOTP or RARP
1406 * <server-ip> - address of host returning BOOTP or RARP packet
1407 * <gw-ip> - none, or the address returned by BOOTP
1408 * <netmask> - automatically determined from <client-ip>, or the
1409 * one returned by BOOTP
1410 * <host name> - <client-ip> in ASCII notation, or the name returned
1411 * by BOOTP
1412 * <device> - use all available devices
1413 * <PROTO>:
1414 * off|none - don't do autoconfig at all (DEFAULT)
1415 * on|any - use any configured protocol
1416 * dhcp|bootp|rarp - use only the specified protocol
1417 * both - use both BOOTP and RARP (not DHCP)
1418 */ 1414 */
1419static int __init ic_proto_name(char *name) 1415static int __init ic_proto_name(char *name)
1420{ 1416{
1421 if (!strcmp(name, "on") || !strcmp(name, "any")) { 1417 if (!strcmp(name, "on") || !strcmp(name, "any")) {
1422 return 1; 1418 return 1;
1423 } 1419 }
1420 if (!strcmp(name, "off") || !strcmp(name, "none")) {
1421 return 0;
1422 }
1424#ifdef CONFIG_IP_PNP_DHCP 1423#ifdef CONFIG_IP_PNP_DHCP
1425 else if (!strcmp(name, "dhcp")) { 1424 else if (!strcmp(name, "dhcp")) {
1426 ic_proto_enabled &= ~IC_RARP; 1425 ic_proto_enabled &= ~IC_RARP;
@@ -1454,17 +1453,24 @@ static int __init ip_auto_config_setup(char *addrs)
1454 int num = 0; 1453 int num = 0;
1455 1454
1456 ic_set_manually = 1; 1455 ic_set_manually = 1;
1456 ic_enable = 1;
1457 1457
1458 ic_enable = (*addrs && 1458 /*
1459 (strcmp(addrs, "off") != 0) && 1459 * If any dhcp, bootp etc options are set, leave autoconfig on
1460 (strcmp(addrs, "none") != 0)); 1460 * and skip the below static IP processing.
1461 if (!ic_enable) 1461 */
1462 if (ic_proto_name(addrs))
1462 return 1; 1463 return 1;
1463 1464
1464 if (ic_proto_name(addrs)) 1465 /* If no static IP is given, turn off autoconfig and bail. */
1466 if (*addrs == 0 ||
1467 strcmp(addrs, "off") == 0 ||
1468 strcmp(addrs, "none") == 0) {
1469 ic_enable = 0;
1465 return 1; 1470 return 1;
1471 }
1466 1472
1467 /* Parse the whole string */ 1473 /* Parse string for static IP assignment. */
1468 ip = addrs; 1474 ip = addrs;
1469 while (ip && *ip) { 1475 while (ip && *ip) {
1470 if ((cp = strchr(ip, ':'))) 1476 if ((cp = strchr(ip, ':')))
@@ -1502,7 +1508,10 @@ static int __init ip_auto_config_setup(char *addrs)
1502 strlcpy(user_dev_name, ip, sizeof(user_dev_name)); 1508 strlcpy(user_dev_name, ip, sizeof(user_dev_name));
1503 break; 1509 break;
1504 case 6: 1510 case 6:
1505 ic_proto_name(ip); 1511 if (ic_proto_name(ip) == 0 &&
1512 ic_myaddr == NONE) {
1513 ic_enable = 0;
1514 }
1506 break; 1515 break;
1507 } 1516 }
1508 } 1517 }
@@ -1518,5 +1527,16 @@ static int __init nfsaddrs_config_setup(char *addrs)
1518 return ip_auto_config_setup(addrs); 1527 return ip_auto_config_setup(addrs);
1519} 1528}
1520 1529
1530static int __init vendor_class_identifier_setup(char *addrs)
1531{
1532 if (strlcpy(vendor_class_identifier, addrs,
1533 sizeof(vendor_class_identifier))
1534 >= sizeof(vendor_class_identifier))
1535 printk(KERN_WARNING "DHCP: vendorclass too long, truncated to \"%s\"",
1536 vendor_class_identifier);
1537 return 1;
1538}
1539
1521__setup("ip=", ip_auto_config_setup); 1540__setup("ip=", ip_auto_config_setup);
1522__setup("nfsaddrs=", nfsaddrs_config_setup); 1541__setup("nfsaddrs=", nfsaddrs_config_setup);
1542__setup("dhcpclass=", vendor_class_identifier_setup);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 8c2b2b0741da..da281581692c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -405,7 +405,7 @@ out:
405 fl.fl4_daddr = eiph->saddr; 405 fl.fl4_daddr = eiph->saddr;
406 fl.fl4_tos = RT_TOS(eiph->tos); 406 fl.fl4_tos = RT_TOS(eiph->tos);
407 fl.proto = IPPROTO_IPIP; 407 fl.proto = IPPROTO_IPIP;
408 if (ip_route_output_key(&rt, &key)) { 408 if (ip_route_output_key(&init_net, &rt, &key)) {
409 kfree_skb(skb2); 409 kfree_skb(skb2);
410 return 0; 410 return 0;
411 } 411 }
@@ -418,7 +418,7 @@ out:
418 fl.fl4_daddr = eiph->daddr; 418 fl.fl4_daddr = eiph->daddr;
419 fl.fl4_src = eiph->saddr; 419 fl.fl4_src = eiph->saddr;
420 fl.fl4_tos = eiph->tos; 420 fl.fl4_tos = eiph->tos;
421 if (ip_route_output_key(&rt, &fl) || 421 if (ip_route_output_key(&init_net, &rt, &fl) ||
422 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 422 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
423 ip_rt_put(rt); 423 ip_rt_put(rt);
424 kfree_skb(skb2); 424 kfree_skb(skb2);
@@ -547,7 +547,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
547 .saddr = tiph->saddr, 547 .saddr = tiph->saddr,
548 .tos = RT_TOS(tos) } }, 548 .tos = RT_TOS(tos) } },
549 .proto = IPPROTO_IPIP }; 549 .proto = IPPROTO_IPIP };
550 if (ip_route_output_key(&rt, &fl)) { 550 if (ip_route_output_key(&init_net, &rt, &fl)) {
551 tunnel->stat.tx_carrier_errors++; 551 tunnel->stat.tx_carrier_errors++;
552 goto tx_error_icmp; 552 goto tx_error_icmp;
553 } 553 }
@@ -651,6 +651,40 @@ tx_error:
651 return 0; 651 return 0;
652} 652}
653 653
654static void ipip_tunnel_bind_dev(struct net_device *dev)
655{
656 struct net_device *tdev = NULL;
657 struct ip_tunnel *tunnel;
658 struct iphdr *iph;
659
660 tunnel = netdev_priv(dev);
661 iph = &tunnel->parms.iph;
662
663 if (iph->daddr) {
664 struct flowi fl = { .oif = tunnel->parms.link,
665 .nl_u = { .ip4_u =
666 { .daddr = iph->daddr,
667 .saddr = iph->saddr,
668 .tos = RT_TOS(iph->tos) } },
669 .proto = IPPROTO_IPIP };
670 struct rtable *rt;
671 if (!ip_route_output_key(&init_net, &rt, &fl)) {
672 tdev = rt->u.dst.dev;
673 ip_rt_put(rt);
674 }
675 dev->flags |= IFF_POINTOPOINT;
676 }
677
678 if (!tdev && tunnel->parms.link)
679 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
680
681 if (tdev) {
682 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
683 dev->mtu = tdev->mtu - sizeof(struct iphdr);
684 }
685 dev->iflink = tunnel->parms.link;
686}
687
654static int 688static int
655ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 689ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
656{ 690{
@@ -723,6 +757,11 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
723 t->parms.iph.ttl = p.iph.ttl; 757 t->parms.iph.ttl = p.iph.ttl;
724 t->parms.iph.tos = p.iph.tos; 758 t->parms.iph.tos = p.iph.tos;
725 t->parms.iph.frag_off = p.iph.frag_off; 759 t->parms.iph.frag_off = p.iph.frag_off;
760 if (t->parms.link != p.link) {
761 t->parms.link = p.link;
762 ipip_tunnel_bind_dev(dev);
763 netdev_state_change(dev);
764 }
726 } 765 }
727 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 766 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
728 err = -EFAULT; 767 err = -EFAULT;
@@ -791,12 +830,9 @@ static void ipip_tunnel_setup(struct net_device *dev)
791 830
792static int ipip_tunnel_init(struct net_device *dev) 831static int ipip_tunnel_init(struct net_device *dev)
793{ 832{
794 struct net_device *tdev = NULL;
795 struct ip_tunnel *tunnel; 833 struct ip_tunnel *tunnel;
796 struct iphdr *iph;
797 834
798 tunnel = netdev_priv(dev); 835 tunnel = netdev_priv(dev);
799 iph = &tunnel->parms.iph;
800 836
801 tunnel->dev = dev; 837 tunnel->dev = dev;
802 strcpy(tunnel->parms.name, dev->name); 838 strcpy(tunnel->parms.name, dev->name);
@@ -804,29 +840,7 @@ static int ipip_tunnel_init(struct net_device *dev)
804 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 840 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
805 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 841 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
806 842
807 if (iph->daddr) { 843 ipip_tunnel_bind_dev(dev);
808 struct flowi fl = { .oif = tunnel->parms.link,
809 .nl_u = { .ip4_u =
810 { .daddr = iph->daddr,
811 .saddr = iph->saddr,
812 .tos = RT_TOS(iph->tos) } },
813 .proto = IPPROTO_IPIP };
814 struct rtable *rt;
815 if (!ip_route_output_key(&rt, &fl)) {
816 tdev = rt->u.dst.dev;
817 ip_rt_put(rt);
818 }
819 dev->flags |= IFF_POINTOPOINT;
820 }
821
822 if (!tdev && tunnel->parms.link)
823 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
824
825 if (tdev) {
826 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
827 dev->mtu = tdev->mtu - sizeof(struct iphdr);
828 }
829 dev->iflink = tunnel->parms.link;
830 844
831 return 0; 845 return 0;
832} 846}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 37bb497d92af..a94f52c207a7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -141,7 +141,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
141 p.iph.ihl = 5; 141 p.iph.ihl = 5;
142 p.iph.protocol = IPPROTO_IPIP; 142 p.iph.protocol = IPPROTO_IPIP;
143 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 143 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
144 ifr.ifr_ifru.ifru_data = (void*)&p; 144 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
145 145
146 oldfs = get_fs(); set_fs(KERNEL_DS); 146 oldfs = get_fs(); set_fs(KERNEL_DS);
147 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 147 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
@@ -321,7 +321,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
321 e->error = -ETIMEDOUT; 321 e->error = -ETIMEDOUT;
322 memset(&e->msg, 0, sizeof(e->msg)); 322 memset(&e->msg, 0, sizeof(e->msg));
323 323
324 rtnl_unicast(skb, NETLINK_CB(skb).pid); 324 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
325 } else 325 } else
326 kfree_skb(skb); 326 kfree_skb(skb);
327 } 327 }
@@ -423,7 +423,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
423 return -ENOBUFS; 423 return -ENOBUFS;
424 break; 424 break;
425 case 0: 425 case 0:
426 dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); 426 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
427 if (!dev) 427 if (!dev)
428 return -EADDRNOTAVAIL; 428 return -EADDRNOTAVAIL;
429 dev_put(dev); 429 dev_put(dev);
@@ -533,7 +533,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
533 memset(&e->msg, 0, sizeof(e->msg)); 533 memset(&e->msg, 0, sizeof(e->msg));
534 } 534 }
535 535
536 rtnl_unicast(skb, NETLINK_CB(skb).pid); 536 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
537 } else 537 } else
538 ip_mr_forward(skb, c, 0); 538 ip_mr_forward(skb, c, 0);
539 } 539 }
@@ -749,7 +749,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
749 return 0; 749 return 0;
750 } 750 }
751 751
752 if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) 752 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
753 return -EINVAL; 753 return -EINVAL;
754 754
755 c=ipmr_cache_alloc(); 755 c=ipmr_cache_alloc();
@@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk)
849{ 849{
850 rtnl_lock(); 850 rtnl_lock();
851 if (sk == mroute_socket) { 851 if (sk == mroute_socket) {
852 IPV4_DEVCONF_ALL(MC_FORWARDING)--; 852 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
853 853
854 write_lock_bh(&mrt_lock); 854 write_lock_bh(&mrt_lock);
855 mroute_socket=NULL; 855 mroute_socket=NULL;
@@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
898 mroute_socket=sk; 898 mroute_socket=sk;
899 write_unlock_bh(&mrt_lock); 899 write_unlock_bh(&mrt_lock);
900 900
901 IPV4_DEVCONF_ALL(MC_FORWARDING)++; 901 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
902 } 902 }
903 rtnl_unlock(); 903 rtnl_unlock();
904 return ret; 904 return ret;
@@ -954,10 +954,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
954#ifdef CONFIG_IP_PIMSM 954#ifdef CONFIG_IP_PIMSM
955 case MRT_PIM: 955 case MRT_PIM:
956 { 956 {
957 int v, ret; 957 int v;
958
958 if (get_user(v,(int __user *)optval)) 959 if (get_user(v,(int __user *)optval))
959 return -EFAULT; 960 return -EFAULT;
960 v = (v)?1:0; 961 v = (v) ? 1 : 0;
962
961 rtnl_lock(); 963 rtnl_lock();
962 ret = 0; 964 ret = 0;
963 if (v != mroute_do_pim) { 965 if (v != mroute_do_pim) {
@@ -1183,7 +1185,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1183 .saddr = vif->local, 1185 .saddr = vif->local,
1184 .tos = RT_TOS(iph->tos) } }, 1186 .tos = RT_TOS(iph->tos) } },
1185 .proto = IPPROTO_IPIP }; 1187 .proto = IPPROTO_IPIP };
1186 if (ip_route_output_key(&rt, &fl)) 1188 if (ip_route_output_key(&init_net, &rt, &fl))
1187 goto out_free; 1189 goto out_free;
1188 encap = sizeof(struct iphdr); 1190 encap = sizeof(struct iphdr);
1189 } else { 1191 } else {
@@ -1192,7 +1194,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1192 { .daddr = iph->daddr, 1194 { .daddr = iph->daddr,
1193 .tos = RT_TOS(iph->tos) } }, 1195 .tos = RT_TOS(iph->tos) } },
1194 .proto = IPPROTO_IPIP }; 1196 .proto = IPPROTO_IPIP };
1195 if (ip_route_output_key(&rt, &fl)) 1197 if (ip_route_output_key(&init_net, &rt, &fl))
1196 goto out_free; 1198 goto out_free;
1197 } 1199 }
1198 1200
@@ -1245,7 +1247,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1245 * not mrouter) cannot join to more than one interface - it will 1247 * not mrouter) cannot join to more than one interface - it will
1246 * result in receiving multiple packets. 1248 * result in receiving multiple packets.
1247 */ 1249 */
1248 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 1250 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1249 ipmr_forward_finish); 1251 ipmr_forward_finish);
1250 return; 1252 return;
1251 1253
@@ -1461,7 +1463,7 @@ int pim_rcv_v1(struct sk_buff * skb)
1461 b. packet is not a NULL-REGISTER 1463 b. packet is not a NULL-REGISTER
1462 c. packet is not truncated 1464 c. packet is not truncated
1463 */ 1465 */
1464 if (!MULTICAST(encap->daddr) || 1466 if (!ipv4_is_multicast(encap->daddr) ||
1465 encap->tot_len == 0 || 1467 encap->tot_len == 0 ||
1466 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1468 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1467 goto drop; 1469 goto drop;
@@ -1517,7 +1519,7 @@ static int pim_rcv(struct sk_buff * skb)
1517 /* check if the inner packet is destined to mcast group */ 1519 /* check if the inner packet is destined to mcast group */
1518 encap = (struct iphdr *)(skb_transport_header(skb) + 1520 encap = (struct iphdr *)(skb_transport_header(skb) +
1519 sizeof(struct pimreghdr)); 1521 sizeof(struct pimreghdr));
1520 if (!MULTICAST(encap->daddr) || 1522 if (!ipv4_is_multicast(encap->daddr) ||
1521 encap->tot_len == 0 || 1523 encap->tot_len == 0 ||
1522 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1524 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1523 goto drop; 1525 goto drop;
@@ -1659,6 +1661,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1659} 1661}
1660 1662
1661static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1663static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1664 __acquires(mrt_lock)
1662{ 1665{
1663 read_lock(&mrt_lock); 1666 read_lock(&mrt_lock);
1664 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 1667 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
@@ -1682,6 +1685,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1682} 1685}
1683 1686
1684static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 1687static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1688 __releases(mrt_lock)
1685{ 1689{
1686 read_unlock(&mrt_lock); 1690 read_unlock(&mrt_lock);
1687} 1691}
@@ -1889,8 +1893,7 @@ void __init ip_mr_init(void)
1889 sizeof(struct mfc_cache), 1893 sizeof(struct mfc_cache),
1890 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 1894 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1891 NULL); 1895 NULL);
1892 init_timer(&ipmr_expire_timer); 1896 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1893 ipmr_expire_timer.function=ipmr_expire_process;
1894 register_netdevice_notifier(&ip_mr_notifier); 1897 register_netdevice_notifier(&ip_mr_notifier);
1895#ifdef CONFIG_PROC_FS 1898#ifdef CONFIG_PROC_FS
1896 proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); 1899 proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 664cb8e97c1c..535abe0c45e7 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -51,18 +51,13 @@ static DEFINE_MUTEX(__ip_vs_app_mutex);
51 */ 51 */
52static inline int ip_vs_app_get(struct ip_vs_app *app) 52static inline int ip_vs_app_get(struct ip_vs_app *app)
53{ 53{
54 /* test and get the module atomically */ 54 return try_module_get(app->module);
55 if (app->module)
56 return try_module_get(app->module);
57 else
58 return 1;
59} 55}
60 56
61 57
62static inline void ip_vs_app_put(struct ip_vs_app *app) 58static inline void ip_vs_app_put(struct ip_vs_app *app)
63{ 59{
64 if (app->module) 60 module_put(app->module);
65 module_put(app->module);
66} 61}
67 62
68 63
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 4b702f708d30..65f1ba112752 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -393,7 +393,15 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
393 atomic_inc(&dest->refcnt); 393 atomic_inc(&dest->refcnt);
394 394
395 /* Bind with the destination and its corresponding transmitter */ 395 /* Bind with the destination and its corresponding transmitter */
396 cp->flags |= atomic_read(&dest->conn_flags); 396 if ((cp->flags & IP_VS_CONN_F_SYNC) &&
397 (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
398 /* if the connection is not template and is created
399 * by sync, preserve the activity flag.
400 */
401 cp->flags |= atomic_read(&dest->conn_flags) &
402 (~IP_VS_CONN_F_INACTIVE);
403 else
404 cp->flags |= atomic_read(&dest->conn_flags);
397 cp->dest = dest; 405 cp->dest = dest;
398 406
399 IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " 407 IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
@@ -412,7 +420,11 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
412 /* It is a normal connection, so increase the inactive 420 /* It is a normal connection, so increase the inactive
413 connection counter because it is in TCP SYNRECV 421 connection counter because it is in TCP SYNRECV
414 state (inactive) or other protocol inacive state */ 422 state (inactive) or other protocol inacive state */
415 atomic_inc(&dest->inactconns); 423 if ((cp->flags & IP_VS_CONN_F_SYNC) &&
424 (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
425 atomic_inc(&dest->activeconns);
426 else
427 atomic_inc(&dest->inactconns);
416 } else { 428 } else {
417 /* It is a persistent connection/template, so increase 429 /* It is a persistent connection/template, so increase
418 the peristent connection counter */ 430 the peristent connection counter */
@@ -426,6 +438,24 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
426 438
427 439
428/* 440/*
441 * Check if there is a destination for the connection, if so
442 * bind the connection to the destination.
443 */
444struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
445{
446 struct ip_vs_dest *dest;
447
448 if ((cp) && (!cp->dest)) {
449 dest = ip_vs_find_dest(cp->daddr, cp->dport,
450 cp->vaddr, cp->vport, cp->protocol);
451 ip_vs_bind_dest(cp, dest);
452 return dest;
453 } else
454 return NULL;
455}
456
457
458/*
429 * Unbind a connection entry with its VS destination 459 * Unbind a connection entry with its VS destination
430 * Called by the ip_vs_conn_expire function. 460 * Called by the ip_vs_conn_expire function.
431 */ 461 */
@@ -611,9 +641,7 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
611 } 641 }
612 642
613 INIT_LIST_HEAD(&cp->c_list); 643 INIT_LIST_HEAD(&cp->c_list);
614 init_timer(&cp->timer); 644 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
615 cp->timer.data = (unsigned long)cp;
616 cp->timer.function = ip_vs_conn_expire;
617 cp->protocol = proto; 645 cp->protocol = proto;
618 cp->caddr = caddr; 646 cp->caddr = caddr;
619 cp->cport = cport; 647 cp->cport = cport;
@@ -765,6 +793,57 @@ static const struct file_operations ip_vs_conn_fops = {
765 .llseek = seq_lseek, 793 .llseek = seq_lseek,
766 .release = seq_release, 794 .release = seq_release,
767}; 795};
796
797static const char *ip_vs_origin_name(unsigned flags)
798{
799 if (flags & IP_VS_CONN_F_SYNC)
800 return "SYNC";
801 else
802 return "LOCAL";
803}
804
805static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
806{
807
808 if (v == SEQ_START_TOKEN)
809 seq_puts(seq,
810 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
811 else {
812 const struct ip_vs_conn *cp = v;
813
814 seq_printf(seq,
815 "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
816 ip_vs_proto_name(cp->protocol),
817 ntohl(cp->caddr), ntohs(cp->cport),
818 ntohl(cp->vaddr), ntohs(cp->vport),
819 ntohl(cp->daddr), ntohs(cp->dport),
820 ip_vs_state_name(cp->protocol, cp->state),
821 ip_vs_origin_name(cp->flags),
822 (cp->timer.expires-jiffies)/HZ);
823 }
824 return 0;
825}
826
827static const struct seq_operations ip_vs_conn_sync_seq_ops = {
828 .start = ip_vs_conn_seq_start,
829 .next = ip_vs_conn_seq_next,
830 .stop = ip_vs_conn_seq_stop,
831 .show = ip_vs_conn_sync_seq_show,
832};
833
834static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
835{
836 return seq_open(file, &ip_vs_conn_sync_seq_ops);
837}
838
839static const struct file_operations ip_vs_conn_sync_fops = {
840 .owner = THIS_MODULE,
841 .open = ip_vs_conn_sync_open,
842 .read = seq_read,
843 .llseek = seq_lseek,
844 .release = seq_release,
845};
846
768#endif 847#endif
769 848
770 849
@@ -924,6 +1003,7 @@ int ip_vs_conn_init(void)
924 } 1003 }
925 1004
926 proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); 1005 proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
1006 proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
927 1007
928 /* calculate the random value for connection hash */ 1008 /* calculate the random value for connection hash */
929 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); 1009 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
@@ -940,5 +1020,6 @@ void ip_vs_conn_cleanup(void)
940 /* Release the empty cache */ 1020 /* Release the empty cache */
941 kmem_cache_destroy(ip_vs_conn_cachep); 1021 kmem_cache_destroy(ip_vs_conn_cachep);
942 proc_net_remove(&init_net, "ip_vs_conn"); 1022 proc_net_remove(&init_net, "ip_vs_conn");
1023 proc_net_remove(&init_net, "ip_vs_conn_sync");
943 vfree(ip_vs_conn_tab); 1024 vfree(ip_vs_conn_tab);
944} 1025}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index c6ed7654e839..963981a9d501 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -423,7 +423,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
423 and the destination is RTN_UNICAST (and not local), then create 423 and the destination is RTN_UNICAST (and not local), then create
424 a cache_bypass connection entry */ 424 a cache_bypass connection entry */
425 if (sysctl_ip_vs_cache_bypass && svc->fwmark 425 if (sysctl_ip_vs_cache_bypass && svc->fwmark
426 && (inet_addr_type(iph->daddr) == RTN_UNICAST)) { 426 && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
427 int ret, cs; 427 int ret, cs;
428 struct ip_vs_conn *cp; 428 struct ip_vs_conn *cp;
429 429
@@ -481,7 +481,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
481 481
482 482
483/* 483/*
484 * It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING 484 * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
485 * chain, and is used for VS/NAT. 485 * chain, and is used for VS/NAT.
486 * It detects packets for VS/NAT connections and sends the packets 486 * It detects packets for VS/NAT connections and sends the packets
487 * immediately. This can avoid that iptable_nat mangles the packets 487 * immediately. This can avoid that iptable_nat mangles the packets
@@ -637,7 +637,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
637 verdict = NF_DROP; 637 verdict = NF_DROP;
638 638
639 if (IP_VS_FWD_METHOD(cp) != 0) { 639 if (IP_VS_FWD_METHOD(cp) != 0) {
640 IP_VS_ERR("shouldn't reach here, because the box is on the" 640 IP_VS_ERR("shouldn't reach here, because the box is on the "
641 "half connection in the tun/dr module.\n"); 641 "half connection in the tun/dr module.\n");
642 } 642 }
643 643
@@ -679,7 +679,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
679} 679}
680 680
681/* 681/*
682 * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT. 682 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
683 * Check if outgoing packet belongs to the established ip_vs_conn, 683 * Check if outgoing packet belongs to the established ip_vs_conn,
684 * rewrite addresses of the packet and send it on its way... 684 * rewrite addresses of the packet and send it on its way...
685 */ 685 */
@@ -814,7 +814,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
814 814
815 /* reassemble IP fragments */ 815 /* reassemble IP fragments */
816 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 816 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
817 if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ? 817 if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
818 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) 818 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
819 return NF_STOLEN; 819 return NF_STOLEN;
820 } 820 }
@@ -979,15 +979,23 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
979 ret = NF_ACCEPT; 979 ret = NF_ACCEPT;
980 } 980 }
981 981
982 /* increase its packet counter and check if it is needed 982 /* Increase its packet counter and check if it is needed
983 to be synchronized */ 983 * to be synchronized
984 *
985 * Sync connection if it is about to close to
986 * encorage the standby servers to update the connections timeout
987 */
984 atomic_inc(&cp->in_pkts); 988 atomic_inc(&cp->in_pkts);
985 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && 989 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
986 (cp->protocol != IPPROTO_TCP || 990 (((cp->protocol != IPPROTO_TCP ||
987 cp->state == IP_VS_TCP_S_ESTABLISHED) && 991 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
988 (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] 992 (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
989 == sysctl_ip_vs_sync_threshold[0])) 993 == sysctl_ip_vs_sync_threshold[0])) ||
994 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
995 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
996 (cp->state == IP_VS_TCP_S_CLOSE)))))
990 ip_vs_sync_conn(cp); 997 ip_vs_sync_conn(cp);
998 cp->old_state = cp->state;
991 999
992 ip_vs_conn_put(cp); 1000 ip_vs_conn_put(cp);
993 return ret; 1001 return ret;
@@ -995,12 +1003,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
995 1003
996 1004
997/* 1005/*
998 * It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP 1006 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
999 * related packets destined for 0.0.0.0/0. 1007 * related packets destined for 0.0.0.0/0.
1000 * When fwmark-based virtual service is used, such as transparent 1008 * When fwmark-based virtual service is used, such as transparent
1001 * cache cluster, TCP packets can be marked and routed to ip_vs_in, 1009 * cache cluster, TCP packets can be marked and routed to ip_vs_in,
1002 * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and 1010 * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
1003 * sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain 1011 * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
1004 * and send them to ip_vs_in_icmp. 1012 * and send them to ip_vs_in_icmp.
1005 */ 1013 */
1006static unsigned int 1014static unsigned int
@@ -1017,43 +1025,42 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
1017} 1025}
1018 1026
1019 1027
1020/* After packet filtering, forward packet through VS/DR, VS/TUN, 1028static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1021 or VS/NAT(change destination), so that filtering rules can be 1029 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1022 applied to IPVS. */ 1030 * or VS/NAT(change destination), so that filtering rules can be
1023static struct nf_hook_ops ip_vs_in_ops = { 1031 * applied to IPVS. */
1024 .hook = ip_vs_in, 1032 {
1025 .owner = THIS_MODULE, 1033 .hook = ip_vs_in,
1026 .pf = PF_INET, 1034 .owner = THIS_MODULE,
1027 .hooknum = NF_IP_LOCAL_IN, 1035 .pf = PF_INET,
1028 .priority = 100, 1036 .hooknum = NF_INET_LOCAL_IN,
1029}; 1037 .priority = 100,
1030 1038 },
1031/* After packet filtering, change source only for VS/NAT */ 1039 /* After packet filtering, change source only for VS/NAT */
1032static struct nf_hook_ops ip_vs_out_ops = { 1040 {
1033 .hook = ip_vs_out, 1041 .hook = ip_vs_out,
1034 .owner = THIS_MODULE, 1042 .owner = THIS_MODULE,
1035 .pf = PF_INET, 1043 .pf = PF_INET,
1036 .hooknum = NF_IP_FORWARD, 1044 .hooknum = NF_INET_FORWARD,
1037 .priority = 100, 1045 .priority = 100,
1038}; 1046 },
1039 1047 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1040/* After packet filtering (but before ip_vs_out_icmp), catch icmp 1048 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
1041 destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1049 {
1042static struct nf_hook_ops ip_vs_forward_icmp_ops = { 1050 .hook = ip_vs_forward_icmp,
1043 .hook = ip_vs_forward_icmp, 1051 .owner = THIS_MODULE,
1044 .owner = THIS_MODULE, 1052 .pf = PF_INET,
1045 .pf = PF_INET, 1053 .hooknum = NF_INET_FORWARD,
1046 .hooknum = NF_IP_FORWARD, 1054 .priority = 99,
1047 .priority = 99, 1055 },
1048}; 1056 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1049 1057 {
1050/* Before the netfilter connection tracking, exit from POST_ROUTING */ 1058 .hook = ip_vs_post_routing,
1051static struct nf_hook_ops ip_vs_post_routing_ops = { 1059 .owner = THIS_MODULE,
1052 .hook = ip_vs_post_routing, 1060 .pf = PF_INET,
1053 .owner = THIS_MODULE, 1061 .hooknum = NF_INET_POST_ROUTING,
1054 .pf = PF_INET, 1062 .priority = NF_IP_PRI_NAT_SRC-1,
1055 .hooknum = NF_IP_POST_ROUTING, 1063 },
1056 .priority = NF_IP_PRI_NAT_SRC-1,
1057}; 1064};
1058 1065
1059 1066
@@ -1084,37 +1091,15 @@ static int __init ip_vs_init(void)
1084 goto cleanup_app; 1091 goto cleanup_app;
1085 } 1092 }
1086 1093
1087 ret = nf_register_hook(&ip_vs_in_ops); 1094 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1088 if (ret < 0) { 1095 if (ret < 0) {
1089 IP_VS_ERR("can't register in hook.\n"); 1096 IP_VS_ERR("can't register hooks.\n");
1090 goto cleanup_conn; 1097 goto cleanup_conn;
1091 } 1098 }
1092 1099
1093 ret = nf_register_hook(&ip_vs_out_ops);
1094 if (ret < 0) {
1095 IP_VS_ERR("can't register out hook.\n");
1096 goto cleanup_inops;
1097 }
1098 ret = nf_register_hook(&ip_vs_post_routing_ops);
1099 if (ret < 0) {
1100 IP_VS_ERR("can't register post_routing hook.\n");
1101 goto cleanup_outops;
1102 }
1103 ret = nf_register_hook(&ip_vs_forward_icmp_ops);
1104 if (ret < 0) {
1105 IP_VS_ERR("can't register forward_icmp hook.\n");
1106 goto cleanup_postroutingops;
1107 }
1108
1109 IP_VS_INFO("ipvs loaded.\n"); 1100 IP_VS_INFO("ipvs loaded.\n");
1110 return ret; 1101 return ret;
1111 1102
1112 cleanup_postroutingops:
1113 nf_unregister_hook(&ip_vs_post_routing_ops);
1114 cleanup_outops:
1115 nf_unregister_hook(&ip_vs_out_ops);
1116 cleanup_inops:
1117 nf_unregister_hook(&ip_vs_in_ops);
1118 cleanup_conn: 1103 cleanup_conn:
1119 ip_vs_conn_cleanup(); 1104 ip_vs_conn_cleanup();
1120 cleanup_app: 1105 cleanup_app:
@@ -1128,10 +1113,7 @@ static int __init ip_vs_init(void)
1128 1113
1129static void __exit ip_vs_cleanup(void) 1114static void __exit ip_vs_cleanup(void)
1130{ 1115{
1131 nf_unregister_hook(&ip_vs_forward_icmp_ops); 1116 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1132 nf_unregister_hook(&ip_vs_post_routing_ops);
1133 nf_unregister_hook(&ip_vs_out_ops);
1134 nf_unregister_hook(&ip_vs_in_ops);
1135 ip_vs_conn_cleanup(); 1117 ip_vs_conn_cleanup();
1136 ip_vs_app_cleanup(); 1118 ip_vs_app_cleanup();
1137 ip_vs_protocol_cleanup(); 1119 ip_vs_protocol_cleanup();
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 7345fc252a23..94c5767c8e01 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -579,6 +579,31 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
579 return NULL; 579 return NULL;
580} 580}
581 581
582/*
583 * Find destination by {daddr,dport,vaddr,protocol}
584 * Cretaed to be used in ip_vs_process_message() in
585 * the backup synchronization daemon. It finds the
586 * destination to be bound to the received connection
587 * on the backup.
588 *
589 * ip_vs_lookup_real_service() looked promissing, but
590 * seems not working as expected.
591 */
592struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
593 __be32 vaddr, __be16 vport, __u16 protocol)
594{
595 struct ip_vs_dest *dest;
596 struct ip_vs_service *svc;
597
598 svc = ip_vs_service_get(0, protocol, vaddr, vport);
599 if (!svc)
600 return NULL;
601 dest = ip_vs_lookup_dest(svc, daddr, dport);
602 if (dest)
603 atomic_inc(&dest->refcnt);
604 ip_vs_service_put(svc);
605 return dest;
606}
582 607
583/* 608/*
584 * Lookup dest by {svc,addr,port} in the destination trash. 609 * Lookup dest by {svc,addr,port} in the destination trash.
@@ -679,7 +704,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
679 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; 704 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
680 705
681 /* check if local node and update the flags */ 706 /* check if local node and update the flags */
682 if (inet_addr_type(udest->addr) == RTN_LOCAL) { 707 if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
683 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) 708 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
684 | IP_VS_CONN_F_LOCALNODE; 709 | IP_VS_CONN_F_LOCALNODE;
685 } 710 }
@@ -731,7 +756,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
731 756
732 EnterFunction(2); 757 EnterFunction(2);
733 758
734 atype = inet_addr_type(udest->addr); 759 atype = inet_addr_type(&init_net, udest->addr);
735 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 760 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
736 return -EINVAL; 761 return -EINVAL;
737 762
@@ -1399,7 +1424,6 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1399 1424
1400static struct ctl_table vs_vars[] = { 1425static struct ctl_table vs_vars[] = {
1401 { 1426 {
1402 .ctl_name = NET_IPV4_VS_AMEMTHRESH,
1403 .procname = "amemthresh", 1427 .procname = "amemthresh",
1404 .data = &sysctl_ip_vs_amemthresh, 1428 .data = &sysctl_ip_vs_amemthresh,
1405 .maxlen = sizeof(int), 1429 .maxlen = sizeof(int),
@@ -1408,7 +1432,6 @@ static struct ctl_table vs_vars[] = {
1408 }, 1432 },
1409#ifdef CONFIG_IP_VS_DEBUG 1433#ifdef CONFIG_IP_VS_DEBUG
1410 { 1434 {
1411 .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
1412 .procname = "debug_level", 1435 .procname = "debug_level",
1413 .data = &sysctl_ip_vs_debug_level, 1436 .data = &sysctl_ip_vs_debug_level,
1414 .maxlen = sizeof(int), 1437 .maxlen = sizeof(int),
@@ -1417,7 +1440,6 @@ static struct ctl_table vs_vars[] = {
1417 }, 1440 },
1418#endif 1441#endif
1419 { 1442 {
1420 .ctl_name = NET_IPV4_VS_AMDROPRATE,
1421 .procname = "am_droprate", 1443 .procname = "am_droprate",
1422 .data = &sysctl_ip_vs_am_droprate, 1444 .data = &sysctl_ip_vs_am_droprate,
1423 .maxlen = sizeof(int), 1445 .maxlen = sizeof(int),
@@ -1425,7 +1447,6 @@ static struct ctl_table vs_vars[] = {
1425 .proc_handler = &proc_dointvec, 1447 .proc_handler = &proc_dointvec,
1426 }, 1448 },
1427 { 1449 {
1428 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
1429 .procname = "drop_entry", 1450 .procname = "drop_entry",
1430 .data = &sysctl_ip_vs_drop_entry, 1451 .data = &sysctl_ip_vs_drop_entry,
1431 .maxlen = sizeof(int), 1452 .maxlen = sizeof(int),
@@ -1433,7 +1454,6 @@ static struct ctl_table vs_vars[] = {
1433 .proc_handler = &proc_do_defense_mode, 1454 .proc_handler = &proc_do_defense_mode,
1434 }, 1455 },
1435 { 1456 {
1436 .ctl_name = NET_IPV4_VS_DROP_PACKET,
1437 .procname = "drop_packet", 1457 .procname = "drop_packet",
1438 .data = &sysctl_ip_vs_drop_packet, 1458 .data = &sysctl_ip_vs_drop_packet,
1439 .maxlen = sizeof(int), 1459 .maxlen = sizeof(int),
@@ -1441,7 +1461,6 @@ static struct ctl_table vs_vars[] = {
1441 .proc_handler = &proc_do_defense_mode, 1461 .proc_handler = &proc_do_defense_mode,
1442 }, 1462 },
1443 { 1463 {
1444 .ctl_name = NET_IPV4_VS_SECURE_TCP,
1445 .procname = "secure_tcp", 1464 .procname = "secure_tcp",
1446 .data = &sysctl_ip_vs_secure_tcp, 1465 .data = &sysctl_ip_vs_secure_tcp,
1447 .maxlen = sizeof(int), 1466 .maxlen = sizeof(int),
@@ -1450,7 +1469,6 @@ static struct ctl_table vs_vars[] = {
1450 }, 1469 },
1451#if 0 1470#if 0
1452 { 1471 {
1453 .ctl_name = NET_IPV4_VS_TO_ES,
1454 .procname = "timeout_established", 1472 .procname = "timeout_established",
1455 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], 1473 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1456 .maxlen = sizeof(int), 1474 .maxlen = sizeof(int),
@@ -1458,7 +1476,6 @@ static struct ctl_table vs_vars[] = {
1458 .proc_handler = &proc_dointvec_jiffies, 1476 .proc_handler = &proc_dointvec_jiffies,
1459 }, 1477 },
1460 { 1478 {
1461 .ctl_name = NET_IPV4_VS_TO_SS,
1462 .procname = "timeout_synsent", 1479 .procname = "timeout_synsent",
1463 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], 1480 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1464 .maxlen = sizeof(int), 1481 .maxlen = sizeof(int),
@@ -1466,7 +1483,6 @@ static struct ctl_table vs_vars[] = {
1466 .proc_handler = &proc_dointvec_jiffies, 1483 .proc_handler = &proc_dointvec_jiffies,
1467 }, 1484 },
1468 { 1485 {
1469 .ctl_name = NET_IPV4_VS_TO_SR,
1470 .procname = "timeout_synrecv", 1486 .procname = "timeout_synrecv",
1471 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], 1487 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1472 .maxlen = sizeof(int), 1488 .maxlen = sizeof(int),
@@ -1474,7 +1490,6 @@ static struct ctl_table vs_vars[] = {
1474 .proc_handler = &proc_dointvec_jiffies, 1490 .proc_handler = &proc_dointvec_jiffies,
1475 }, 1491 },
1476 { 1492 {
1477 .ctl_name = NET_IPV4_VS_TO_FW,
1478 .procname = "timeout_finwait", 1493 .procname = "timeout_finwait",
1479 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], 1494 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1480 .maxlen = sizeof(int), 1495 .maxlen = sizeof(int),
@@ -1482,7 +1497,6 @@ static struct ctl_table vs_vars[] = {
1482 .proc_handler = &proc_dointvec_jiffies, 1497 .proc_handler = &proc_dointvec_jiffies,
1483 }, 1498 },
1484 { 1499 {
1485 .ctl_name = NET_IPV4_VS_TO_TW,
1486 .procname = "timeout_timewait", 1500 .procname = "timeout_timewait",
1487 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], 1501 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1488 .maxlen = sizeof(int), 1502 .maxlen = sizeof(int),
@@ -1490,7 +1504,6 @@ static struct ctl_table vs_vars[] = {
1490 .proc_handler = &proc_dointvec_jiffies, 1504 .proc_handler = &proc_dointvec_jiffies,
1491 }, 1505 },
1492 { 1506 {
1493 .ctl_name = NET_IPV4_VS_TO_CL,
1494 .procname = "timeout_close", 1507 .procname = "timeout_close",
1495 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], 1508 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1496 .maxlen = sizeof(int), 1509 .maxlen = sizeof(int),
@@ -1498,7 +1511,6 @@ static struct ctl_table vs_vars[] = {
1498 .proc_handler = &proc_dointvec_jiffies, 1511 .proc_handler = &proc_dointvec_jiffies,
1499 }, 1512 },
1500 { 1513 {
1501 .ctl_name = NET_IPV4_VS_TO_CW,
1502 .procname = "timeout_closewait", 1514 .procname = "timeout_closewait",
1503 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], 1515 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1504 .maxlen = sizeof(int), 1516 .maxlen = sizeof(int),
@@ -1506,7 +1518,6 @@ static struct ctl_table vs_vars[] = {
1506 .proc_handler = &proc_dointvec_jiffies, 1518 .proc_handler = &proc_dointvec_jiffies,
1507 }, 1519 },
1508 { 1520 {
1509 .ctl_name = NET_IPV4_VS_TO_LA,
1510 .procname = "timeout_lastack", 1521 .procname = "timeout_lastack",
1511 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], 1522 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1512 .maxlen = sizeof(int), 1523 .maxlen = sizeof(int),
@@ -1514,7 +1525,6 @@ static struct ctl_table vs_vars[] = {
1514 .proc_handler = &proc_dointvec_jiffies, 1525 .proc_handler = &proc_dointvec_jiffies,
1515 }, 1526 },
1516 { 1527 {
1517 .ctl_name = NET_IPV4_VS_TO_LI,
1518 .procname = "timeout_listen", 1528 .procname = "timeout_listen",
1519 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], 1529 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1520 .maxlen = sizeof(int), 1530 .maxlen = sizeof(int),
@@ -1522,7 +1532,6 @@ static struct ctl_table vs_vars[] = {
1522 .proc_handler = &proc_dointvec_jiffies, 1532 .proc_handler = &proc_dointvec_jiffies,
1523 }, 1533 },
1524 { 1534 {
1525 .ctl_name = NET_IPV4_VS_TO_SA,
1526 .procname = "timeout_synack", 1535 .procname = "timeout_synack",
1527 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], 1536 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1528 .maxlen = sizeof(int), 1537 .maxlen = sizeof(int),
@@ -1530,7 +1539,6 @@ static struct ctl_table vs_vars[] = {
1530 .proc_handler = &proc_dointvec_jiffies, 1539 .proc_handler = &proc_dointvec_jiffies,
1531 }, 1540 },
1532 { 1541 {
1533 .ctl_name = NET_IPV4_VS_TO_UDP,
1534 .procname = "timeout_udp", 1542 .procname = "timeout_udp",
1535 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], 1543 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1536 .maxlen = sizeof(int), 1544 .maxlen = sizeof(int),
@@ -1538,7 +1546,6 @@ static struct ctl_table vs_vars[] = {
1538 .proc_handler = &proc_dointvec_jiffies, 1546 .proc_handler = &proc_dointvec_jiffies,
1539 }, 1547 },
1540 { 1548 {
1541 .ctl_name = NET_IPV4_VS_TO_ICMP,
1542 .procname = "timeout_icmp", 1549 .procname = "timeout_icmp",
1543 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], 1550 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1544 .maxlen = sizeof(int), 1551 .maxlen = sizeof(int),
@@ -1547,7 +1554,6 @@ static struct ctl_table vs_vars[] = {
1547 }, 1554 },
1548#endif 1555#endif
1549 { 1556 {
1550 .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
1551 .procname = "cache_bypass", 1557 .procname = "cache_bypass",
1552 .data = &sysctl_ip_vs_cache_bypass, 1558 .data = &sysctl_ip_vs_cache_bypass,
1553 .maxlen = sizeof(int), 1559 .maxlen = sizeof(int),
@@ -1555,7 +1561,6 @@ static struct ctl_table vs_vars[] = {
1555 .proc_handler = &proc_dointvec, 1561 .proc_handler = &proc_dointvec,
1556 }, 1562 },
1557 { 1563 {
1558 .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
1559 .procname = "expire_nodest_conn", 1564 .procname = "expire_nodest_conn",
1560 .data = &sysctl_ip_vs_expire_nodest_conn, 1565 .data = &sysctl_ip_vs_expire_nodest_conn,
1561 .maxlen = sizeof(int), 1566 .maxlen = sizeof(int),
@@ -1563,7 +1568,6 @@ static struct ctl_table vs_vars[] = {
1563 .proc_handler = &proc_dointvec, 1568 .proc_handler = &proc_dointvec,
1564 }, 1569 },
1565 { 1570 {
1566 .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
1567 .procname = "expire_quiescent_template", 1571 .procname = "expire_quiescent_template",
1568 .data = &sysctl_ip_vs_expire_quiescent_template, 1572 .data = &sysctl_ip_vs_expire_quiescent_template,
1569 .maxlen = sizeof(int), 1573 .maxlen = sizeof(int),
@@ -1571,7 +1575,6 @@ static struct ctl_table vs_vars[] = {
1571 .proc_handler = &proc_dointvec, 1575 .proc_handler = &proc_dointvec,
1572 }, 1576 },
1573 { 1577 {
1574 .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
1575 .procname = "sync_threshold", 1578 .procname = "sync_threshold",
1576 .data = &sysctl_ip_vs_sync_threshold, 1579 .data = &sysctl_ip_vs_sync_threshold,
1577 .maxlen = sizeof(sysctl_ip_vs_sync_threshold), 1580 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
@@ -1579,7 +1582,6 @@ static struct ctl_table vs_vars[] = {
1579 .proc_handler = &proc_do_sync_threshold, 1582 .proc_handler = &proc_do_sync_threshold,
1580 }, 1583 },
1581 { 1584 {
1582 .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
1583 .procname = "nat_icmp_send", 1585 .procname = "nat_icmp_send",
1584 .data = &sysctl_ip_vs_nat_icmp_send, 1586 .data = &sysctl_ip_vs_nat_icmp_send,
1585 .maxlen = sizeof(int), 1587 .maxlen = sizeof(int),
@@ -1589,35 +1591,13 @@ static struct ctl_table vs_vars[] = {
1589 { .ctl_name = 0 } 1591 { .ctl_name = 0 }
1590}; 1592};
1591 1593
1592static ctl_table vs_table[] = { 1594struct ctl_path net_vs_ctl_path[] = {
1593 { 1595 { .procname = "net", .ctl_name = CTL_NET, },
1594 .ctl_name = NET_IPV4_VS, 1596 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1595 .procname = "vs", 1597 { .procname = "vs", },
1596 .mode = 0555, 1598 { }
1597 .child = vs_vars
1598 },
1599 { .ctl_name = 0 }
1600};
1601
1602static ctl_table ipvs_ipv4_table[] = {
1603 {
1604 .ctl_name = NET_IPV4,
1605 .procname = "ipv4",
1606 .mode = 0555,
1607 .child = vs_table,
1608 },
1609 { .ctl_name = 0 }
1610};
1611
1612static ctl_table vs_root_table[] = {
1613 {
1614 .ctl_name = CTL_NET,
1615 .procname = "net",
1616 .mode = 0555,
1617 .child = ipvs_ipv4_table,
1618 },
1619 { .ctl_name = 0 }
1620}; 1599};
1600EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1621 1601
1622static struct ctl_table_header * sysctl_header; 1602static struct ctl_table_header * sysctl_header;
1623 1603
@@ -2344,7 +2324,7 @@ int ip_vs_control_init(void)
2344 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); 2324 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
2345 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); 2325 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
2346 2326
2347 sysctl_header = register_sysctl_table(vs_root_table); 2327 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
2348 2328
2349 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 2329 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2350 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2330 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 7d68b80c4c19..dfa0d713c801 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/sysctl.h>
21 22
22#include <net/ip_vs.h> 23#include <net/ip_vs.h>
23 24
@@ -146,9 +147,8 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
146 write_lock_bh(&est_lock); 147 write_lock_bh(&est_lock);
147 est->next = est_list; 148 est->next = est_list;
148 if (est->next == NULL) { 149 if (est->next == NULL) {
149 init_timer(&est_timer); 150 setup_timer(&est_timer, estimation_timer, 0);
150 est_timer.expires = jiffies + 2*HZ; 151 est_timer.expires = jiffies + 2*HZ;
151 est_timer.function = estimation_timer;
152 add_timer(&est_timer); 152 add_timer(&est_timer);
153 } 153 }
154 est_list = est; 154 est_list = est;
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 052f4ed59174..3888642706ad 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -114,7 +114,6 @@ struct ip_vs_lblc_table {
114 114
115static ctl_table vs_vars_table[] = { 115static ctl_table vs_vars_table[] = {
116 { 116 {
117 .ctl_name = NET_IPV4_VS_LBLC_EXPIRE,
118 .procname = "lblc_expiration", 117 .procname = "lblc_expiration",
119 .data = &sysctl_ip_vs_lblc_expiration, 118 .data = &sysctl_ip_vs_lblc_expiration,
120 .maxlen = sizeof(int), 119 .maxlen = sizeof(int),
@@ -124,36 +123,6 @@ static ctl_table vs_vars_table[] = {
124 { .ctl_name = 0 } 123 { .ctl_name = 0 }
125}; 124};
126 125
127static ctl_table vs_table[] = {
128 {
129 .ctl_name = NET_IPV4_VS,
130 .procname = "vs",
131 .mode = 0555,
132 .child = vs_vars_table
133 },
134 { .ctl_name = 0 }
135};
136
137static ctl_table ipvs_ipv4_table[] = {
138 {
139 .ctl_name = NET_IPV4,
140 .procname = "ipv4",
141 .mode = 0555,
142 .child = vs_table
143 },
144 { .ctl_name = 0 }
145};
146
147static ctl_table lblc_root_table[] = {
148 {
149 .ctl_name = CTL_NET,
150 .procname = "net",
151 .mode = 0555,
152 .child = ipvs_ipv4_table
153 },
154 { .ctl_name = 0 }
155};
156
157static struct ctl_table_header * sysctl_header; 126static struct ctl_table_header * sysctl_header;
158 127
159/* 128/*
@@ -393,9 +362,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
393 /* 362 /*
394 * Hook periodic timer for garbage collection 363 * Hook periodic timer for garbage collection
395 */ 364 */
396 init_timer(&tbl->periodic_timer); 365 setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
397 tbl->periodic_timer.data = (unsigned long)tbl; 366 (unsigned long)tbl);
398 tbl->periodic_timer.function = ip_vs_lblc_check_expire;
399 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; 367 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
400 add_timer(&tbl->periodic_timer); 368 add_timer(&tbl->periodic_timer);
401 369
@@ -582,9 +550,14 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
582 550
583static int __init ip_vs_lblc_init(void) 551static int __init ip_vs_lblc_init(void)
584{ 552{
553 int ret;
554
585 INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list); 555 INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
586 sysctl_header = register_sysctl_table(lblc_root_table); 556 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
587 return register_ip_vs_scheduler(&ip_vs_lblc_scheduler); 557 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
558 if (ret)
559 unregister_sysctl_table(sysctl_header);
560 return ret;
588} 561}
589 562
590 563
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 6a1fec416eaf..daa260eb21cf 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -48,8 +48,6 @@
48/* for sysctl */ 48/* for sysctl */
49#include <linux/fs.h> 49#include <linux/fs.h>
50#include <linux/sysctl.h> 50#include <linux/sysctl.h>
51/* for proc_net_create/proc_net_remove */
52#include <linux/proc_fs.h>
53#include <net/net_namespace.h> 51#include <net/net_namespace.h>
54 52
55#include <net/ip_vs.h> 53#include <net/ip_vs.h>
@@ -304,7 +302,6 @@ struct ip_vs_lblcr_table {
304 302
305static ctl_table vs_vars_table[] = { 303static ctl_table vs_vars_table[] = {
306 { 304 {
307 .ctl_name = NET_IPV4_VS_LBLCR_EXPIRE,
308 .procname = "lblcr_expiration", 305 .procname = "lblcr_expiration",
309 .data = &sysctl_ip_vs_lblcr_expiration, 306 .data = &sysctl_ip_vs_lblcr_expiration,
310 .maxlen = sizeof(int), 307 .maxlen = sizeof(int),
@@ -314,36 +311,6 @@ static ctl_table vs_vars_table[] = {
314 { .ctl_name = 0 } 311 { .ctl_name = 0 }
315}; 312};
316 313
317static ctl_table vs_table[] = {
318 {
319 .ctl_name = NET_IPV4_VS,
320 .procname = "vs",
321 .mode = 0555,
322 .child = vs_vars_table
323 },
324 { .ctl_name = 0 }
325};
326
327static ctl_table ipvs_ipv4_table[] = {
328 {
329 .ctl_name = NET_IPV4,
330 .procname = "ipv4",
331 .mode = 0555,
332 .child = vs_table
333 },
334 { .ctl_name = 0 }
335};
336
337static ctl_table lblcr_root_table[] = {
338 {
339 .ctl_name = CTL_NET,
340 .procname = "net",
341 .mode = 0555,
342 .child = ipvs_ipv4_table
343 },
344 { .ctl_name = 0 }
345};
346
347static struct ctl_table_header * sysctl_header; 314static struct ctl_table_header * sysctl_header;
348 315
349/* 316/*
@@ -547,71 +514,6 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
547 mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL); 514 mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
548} 515}
549 516
550
551#ifdef CONFIG_IP_VS_LBLCR_DEBUG
552static struct ip_vs_lblcr_table *lblcr_table_list;
553
554/*
555 * /proc/net/ip_vs_lblcr to display the mappings of
556 * destination IP address <==> its serverSet
557 */
558static int
559ip_vs_lblcr_getinfo(char *buffer, char **start, off_t offset, int length)
560{
561 off_t pos=0, begin;
562 int len=0, size;
563 struct ip_vs_lblcr_table *tbl;
564 unsigned long now = jiffies;
565 int i;
566 struct ip_vs_lblcr_entry *en;
567
568 tbl = lblcr_table_list;
569
570 size = sprintf(buffer, "LastTime Dest IP address Server set\n");
571 pos += size;
572 len += size;
573
574 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
575 read_lock_bh(&tbl->lock);
576 list_for_each_entry(en, &tbl->bucket[i], list) {
577 char tbuf[16];
578 struct ip_vs_dest_list *d;
579
580 sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(en->addr));
581 size = sprintf(buffer+len, "%8lu %-16s ",
582 now-en->lastuse, tbuf);
583
584 read_lock(&en->set.lock);
585 for (d=en->set.list; d!=NULL; d=d->next) {
586 size += sprintf(buffer+len+size,
587 "%u.%u.%u.%u ",
588 NIPQUAD(d->dest->addr));
589 }
590 read_unlock(&en->set.lock);
591 size += sprintf(buffer+len+size, "\n");
592 len += size;
593 pos += size;
594 if (pos <= offset)
595 len=0;
596 if (pos >= offset+length) {
597 read_unlock_bh(&tbl->lock);
598 goto done;
599 }
600 }
601 read_unlock_bh(&tbl->lock);
602 }
603
604 done:
605 begin = len - (pos - offset);
606 *start = buffer + begin;
607 len -= begin;
608 if(len>length)
609 len = length;
610 return len;
611}
612#endif
613
614
615static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) 517static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
616{ 518{
617 int i; 519 int i;
@@ -644,15 +546,11 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
644 /* 546 /*
645 * Hook periodic timer for garbage collection 547 * Hook periodic timer for garbage collection
646 */ 548 */
647 init_timer(&tbl->periodic_timer); 549 setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
648 tbl->periodic_timer.data = (unsigned long)tbl; 550 (unsigned long)tbl);
649 tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
650 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; 551 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
651 add_timer(&tbl->periodic_timer); 552 add_timer(&tbl->periodic_timer);
652 553
653#ifdef CONFIG_IP_VS_LBLCR_DEBUG
654 lblcr_table_list = tbl;
655#endif
656 return 0; 554 return 0;
657} 555}
658 556
@@ -841,20 +739,19 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
841 739
842static int __init ip_vs_lblcr_init(void) 740static int __init ip_vs_lblcr_init(void)
843{ 741{
742 int ret;
743
844 INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); 744 INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
845 sysctl_header = register_sysctl_table(lblcr_root_table); 745 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
846#ifdef CONFIG_IP_VS_LBLCR_DEBUG 746 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
847 proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); 747 if (ret)
848#endif 748 unregister_sysctl_table(sysctl_header);
849 return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 749 return ret;
850} 750}
851 751
852 752
853static void __exit ip_vs_lblcr_cleanup(void) 753static void __exit ip_vs_lblcr_cleanup(void)
854{ 754{
855#ifdef CONFIG_IP_VS_LBLCR_DEBUG
856 proc_net_remove(&init_net, "ip_vs_lblcr");
857#endif
858 unregister_sysctl_table(sysctl_header); 755 unregister_sysctl_table(sysctl_header);
859 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 756 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
860} 757}
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index e844ddb82b9a..dde28a250d92 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -45,7 +45,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
45/* 45/*
46 * register an ipvs protocol 46 * register an ipvs protocol
47 */ 47 */
48static int register_ip_vs_protocol(struct ip_vs_protocol *pp) 48static int __used register_ip_vs_protocol(struct ip_vs_protocol *pp)
49{ 49{
50 unsigned hash = IP_VS_PROTO_HASH(pp->protocol); 50 unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
51 51
@@ -165,7 +165,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
165 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 165 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
166 if (ih == NULL) 166 if (ih == NULL)
167 sprintf(buf, "%s TRUNCATED", pp->name); 167 sprintf(buf, "%s TRUNCATED", pp->name);
168 else if (ih->frag_off & __constant_htons(IP_OFFSET)) 168 else if (ih->frag_off & htons(IP_OFFSET))
169 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", 169 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
170 pp->name, NIPQUAD(ih->saddr), 170 pp->name, NIPQUAD(ih->saddr),
171 NIPQUAD(ih->daddr)); 171 NIPQUAD(ih->daddr));
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index c36ccf057a19..aef0d3ee8e44 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -52,15 +52,15 @@ esp_conn_in_get(const struct sk_buff *skb,
52 if (likely(!inverse)) { 52 if (likely(!inverse)) {
53 cp = ip_vs_conn_in_get(IPPROTO_UDP, 53 cp = ip_vs_conn_in_get(IPPROTO_UDP,
54 iph->saddr, 54 iph->saddr,
55 __constant_htons(PORT_ISAKMP), 55 htons(PORT_ISAKMP),
56 iph->daddr, 56 iph->daddr,
57 __constant_htons(PORT_ISAKMP)); 57 htons(PORT_ISAKMP));
58 } else { 58 } else {
59 cp = ip_vs_conn_in_get(IPPROTO_UDP, 59 cp = ip_vs_conn_in_get(IPPROTO_UDP,
60 iph->daddr, 60 iph->daddr,
61 __constant_htons(PORT_ISAKMP), 61 htons(PORT_ISAKMP),
62 iph->saddr, 62 iph->saddr,
63 __constant_htons(PORT_ISAKMP)); 63 htons(PORT_ISAKMP));
64 } 64 }
65 65
66 if (!cp) { 66 if (!cp) {
@@ -89,15 +89,15 @@ esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
89 if (likely(!inverse)) { 89 if (likely(!inverse)) {
90 cp = ip_vs_conn_out_get(IPPROTO_UDP, 90 cp = ip_vs_conn_out_get(IPPROTO_UDP,
91 iph->saddr, 91 iph->saddr,
92 __constant_htons(PORT_ISAKMP), 92 htons(PORT_ISAKMP),
93 iph->daddr, 93 iph->daddr,
94 __constant_htons(PORT_ISAKMP)); 94 htons(PORT_ISAKMP));
95 } else { 95 } else {
96 cp = ip_vs_conn_out_get(IPPROTO_UDP, 96 cp = ip_vs_conn_out_get(IPPROTO_UDP,
97 iph->daddr, 97 iph->daddr,
98 __constant_htons(PORT_ISAKMP), 98 htons(PORT_ISAKMP),
99 iph->saddr, 99 iph->saddr,
100 __constant_htons(PORT_ISAKMP)); 100 htons(PORT_ISAKMP));
101 } 101 }
102 102
103 if (!cp) { 103 if (!cp) {
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 1602304abbf9..121a32b1b756 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -24,6 +24,7 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <asm/string.h> 25#include <asm/string.h>
26#include <linux/kmod.h> 26#include <linux/kmod.h>
27#include <linux/sysctl.h>
27 28
28#include <net/ip_vs.h> 29#include <net/ip_vs.h>
29 30
@@ -183,19 +184,6 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
183 /* increase the module use count */ 184 /* increase the module use count */
184 ip_vs_use_count_inc(); 185 ip_vs_use_count_inc();
185 186
186 /*
187 * Make sure that the scheduler with this name doesn't exist
188 * in the scheduler list.
189 */
190 sched = ip_vs_sched_getbyname(scheduler->name);
191 if (sched) {
192 ip_vs_scheduler_put(sched);
193 ip_vs_use_count_dec();
194 IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
195 "already existed in the system\n", scheduler->name);
196 return -EINVAL;
197 }
198
199 write_lock_bh(&__ip_vs_sched_lock); 187 write_lock_bh(&__ip_vs_sched_lock);
200 188
201 if (scheduler->n_list.next != &scheduler->n_list) { 189 if (scheduler->n_list.next != &scheduler->n_list) {
@@ -207,6 +195,20 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
207 } 195 }
208 196
209 /* 197 /*
198 * Make sure that the scheduler with this name doesn't exist
199 * in the scheduler list.
200 */
201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
202 if (strcmp(scheduler->name, sched->name) == 0) {
203 write_unlock_bh(&__ip_vs_sched_lock);
204 ip_vs_use_count_dec();
205 IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
206 "already existed in the system\n",
207 scheduler->name);
208 return -EINVAL;
209 }
210 }
211 /*
210 * Add it into the d-linked scheduler list 212 * Add it into the d-linked scheduler list
211 */ 213 */
212 list_add(&scheduler->n_list, &ip_vs_schedulers); 214 list_add(&scheduler->n_list, &ip_vs_schedulers);
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index c99f2a33fb9e..948378d0a755 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -72,7 +72,6 @@ struct ip_vs_sync_thread_data {
72 int state; 72 int state;
73}; 73};
74 74
75#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ)
76#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 75#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
77#define FULL_CONN_SIZE \ 76#define FULL_CONN_SIZE \
78(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) 77(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
@@ -284,6 +283,8 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
284 struct ip_vs_sync_conn *s; 283 struct ip_vs_sync_conn *s;
285 struct ip_vs_sync_conn_options *opt; 284 struct ip_vs_sync_conn_options *opt;
286 struct ip_vs_conn *cp; 285 struct ip_vs_conn *cp;
286 struct ip_vs_protocol *pp;
287 struct ip_vs_dest *dest;
287 char *p; 288 char *p;
288 int i; 289 int i;
289 290
@@ -304,10 +305,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
304 305
305 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 306 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
306 for (i=0; i<m->nr_conns; i++) { 307 for (i=0; i<m->nr_conns; i++) {
307 unsigned flags; 308 unsigned flags, state;
308 309
309 s = (struct ip_vs_sync_conn *)p; 310 s = (struct ip_vs_sync_conn *)p;
310 flags = ntohs(s->flags); 311 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
312 state = ntohs(s->state);
311 if (!(flags & IP_VS_CONN_F_TEMPLATE)) 313 if (!(flags & IP_VS_CONN_F_TEMPLATE))
312 cp = ip_vs_conn_in_get(s->protocol, 314 cp = ip_vs_conn_in_get(s->protocol,
313 s->caddr, s->cport, 315 s->caddr, s->cport,
@@ -317,22 +319,57 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
317 s->caddr, s->cport, 319 s->caddr, s->cport,
318 s->vaddr, s->vport); 320 s->vaddr, s->vport);
319 if (!cp) { 321 if (!cp) {
322 /*
323 * Find the appropriate destination for the connection.
324 * If it is not found the connection will remain unbound
325 * but still handled.
326 */
327 dest = ip_vs_find_dest(s->daddr, s->dport,
328 s->vaddr, s->vport,
329 s->protocol);
330 /* Set the approprite ativity flag */
331 if (s->protocol == IPPROTO_TCP) {
332 if (state != IP_VS_TCP_S_ESTABLISHED)
333 flags |= IP_VS_CONN_F_INACTIVE;
334 else
335 flags &= ~IP_VS_CONN_F_INACTIVE;
336 }
320 cp = ip_vs_conn_new(s->protocol, 337 cp = ip_vs_conn_new(s->protocol,
321 s->caddr, s->cport, 338 s->caddr, s->cport,
322 s->vaddr, s->vport, 339 s->vaddr, s->vport,
323 s->daddr, s->dport, 340 s->daddr, s->dport,
324 flags, NULL); 341 flags, dest);
342 if (dest)
343 atomic_dec(&dest->refcnt);
325 if (!cp) { 344 if (!cp) {
326 IP_VS_ERR("ip_vs_conn_new failed\n"); 345 IP_VS_ERR("ip_vs_conn_new failed\n");
327 return; 346 return;
328 } 347 }
329 cp->state = ntohs(s->state); 348 cp->state = state;
330 } else if (!cp->dest) { 349 } else if (!cp->dest) {
331 /* it is an entry created by the synchronization */ 350 dest = ip_vs_try_bind_dest(cp);
332 cp->state = ntohs(s->state); 351 if (!dest) {
333 cp->flags = flags | IP_VS_CONN_F_HASHED; 352 /* it is an unbound entry created by
334 } /* Note that we don't touch its state and flags 353 * synchronization */
335 if it is a normal entry. */ 354 cp->flags = flags | IP_VS_CONN_F_HASHED;
355 } else
356 atomic_dec(&dest->refcnt);
357 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
358 (cp->state != state)) {
359 /* update active/inactive flag for the connection */
360 dest = cp->dest;
361 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
362 (state != IP_VS_TCP_S_ESTABLISHED)) {
363 atomic_dec(&dest->activeconns);
364 atomic_inc(&dest->inactconns);
365 cp->flags |= IP_VS_CONN_F_INACTIVE;
366 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
367 (state == IP_VS_TCP_S_ESTABLISHED)) {
368 atomic_inc(&dest->activeconns);
369 atomic_dec(&dest->inactconns);
370 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
371 }
372 }
336 373
337 if (flags & IP_VS_CONN_F_SEQ_MASK) { 374 if (flags & IP_VS_CONN_F_SEQ_MASK) {
338 opt = (struct ip_vs_sync_conn_options *)&s[1]; 375 opt = (struct ip_vs_sync_conn_options *)&s[1];
@@ -342,7 +379,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
342 p += SIMPLE_CONN_SIZE; 379 p += SIMPLE_CONN_SIZE;
343 380
344 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); 381 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
345 cp->timeout = IP_VS_SYNC_CONN_TIMEOUT; 382 cp->state = state;
383 pp = ip_vs_proto_get(s->protocol);
384 cp->timeout = pp->timeout_table[cp->state];
346 ip_vs_conn_put(cp); 385 ip_vs_conn_put(cp);
347 386
348 if (p > buffer+buflen) { 387 if (p > buffer+buflen) {
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 7c074e386c17..f63006caea03 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -16,8 +16,8 @@
16 */ 16 */
17 17
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/ip.h>
20#include <linux/tcp.h> /* for tcphdr */ 19#include <linux/tcp.h> /* for tcphdr */
20#include <net/ip.h>
21#include <net/tcp.h> /* for csum_tcpudp_magic */ 21#include <net/tcp.h> /* for csum_tcpudp_magic */
22#include <net/udp.h> 22#include <net/udp.h>
23#include <net/icmp.h> /* for icmp_send */ 23#include <net/icmp.h> /* for icmp_send */
@@ -59,7 +59,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
59 return dst; 59 return dst;
60} 60}
61 61
62static inline struct rtable * 62static struct rtable *
63__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) 63__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
64{ 64{
65 struct rtable *rt; /* Route to the other host */ 65 struct rtable *rt; /* Route to the other host */
@@ -78,7 +78,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
78 .tos = rtos, } }, 78 .tos = rtos, } },
79 }; 79 };
80 80
81 if (ip_route_output_key(&rt, &fl)) { 81 if (ip_route_output_key(&init_net, &rt, &fl)) {
82 spin_unlock(&dest->dst_lock); 82 spin_unlock(&dest->dst_lock);
83 IP_VS_DBG_RL("ip_route_output error, " 83 IP_VS_DBG_RL("ip_route_output error, "
84 "dest: %u.%u.%u.%u\n", 84 "dest: %u.%u.%u.%u\n",
@@ -101,7 +101,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
101 .tos = rtos, } }, 101 .tos = rtos, } },
102 }; 102 };
103 103
104 if (ip_route_output_key(&rt, &fl)) { 104 if (ip_route_output_key(&init_net, &rt, &fl)) {
105 IP_VS_DBG_RL("ip_route_output error, dest: " 105 IP_VS_DBG_RL("ip_route_output error, dest: "
106 "%u.%u.%u.%u\n", NIPQUAD(cp->daddr)); 106 "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
107 return NULL; 107 return NULL;
@@ -129,7 +129,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
129do { \ 129do { \
130 (skb)->ipvs_property = 1; \ 130 (skb)->ipvs_property = 1; \
131 skb_forward_csum(skb); \ 131 skb_forward_csum(skb); \
132 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ 132 NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \
133 (rt)->u.dst.dev, dst_output); \ 133 (rt)->u.dst.dev, dst_output); \
134} while (0) 134} while (0)
135 135
@@ -170,7 +170,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
170 170
171 EnterFunction(10); 171 EnterFunction(10);
172 172
173 if (ip_route_output_key(&rt, &fl)) { 173 if (ip_route_output_key(&init_net, &rt, &fl)) {
174 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, " 174 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
175 "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr)); 175 "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
176 goto tx_error_icmp; 176 goto tx_error_icmp;
@@ -406,14 +406,12 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
406 iph->daddr = rt->rt_dst; 406 iph->daddr = rt->rt_dst;
407 iph->saddr = rt->rt_src; 407 iph->saddr = rt->rt_src;
408 iph->ttl = old_iph->ttl; 408 iph->ttl = old_iph->ttl;
409 iph->tot_len = htons(skb->len);
410 ip_select_ident(iph, &rt->u.dst, NULL); 409 ip_select_ident(iph, &rt->u.dst, NULL);
411 ip_send_check(iph);
412 410
413 /* Another hack: avoid icmp_send in ip_fragment */ 411 /* Another hack: avoid icmp_send in ip_fragment */
414 skb->local_df = 1; 412 skb->local_df = 1;
415 413
416 IP_VS_XMIT(skb, rt); 414 ip_local_out(skb);
417 415
418 LeaveFunction(10); 416 LeaveFunction(10);
419 417
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 5539debf4973..9a904c6c0dc8 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -7,6 +7,7 @@
7#include <net/route.h> 7#include <net/route.h>
8#include <net/xfrm.h> 8#include <net/xfrm.h>
9#include <net/ip.h> 9#include <net/ip.h>
10#include <net/netfilter/nf_queue.h>
10 11
11/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 12/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
12int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) 13int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
@@ -18,12 +19,12 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
18 unsigned int hh_len; 19 unsigned int hh_len;
19 unsigned int type; 20 unsigned int type;
20 21
21 type = inet_addr_type(iph->saddr); 22 type = inet_addr_type(&init_net, iph->saddr);
22 if (addr_type == RTN_UNSPEC) 23 if (addr_type == RTN_UNSPEC)
23 addr_type = type; 24 addr_type = type;
24 25
25 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause 26 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
26 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. 27 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
27 */ 28 */
28 if (addr_type == RTN_LOCAL) { 29 if (addr_type == RTN_LOCAL) {
29 fl.nl_u.ip4_u.daddr = iph->daddr; 30 fl.nl_u.ip4_u.daddr = iph->daddr;
@@ -32,7 +33,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
32 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); 33 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
33 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 34 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
34 fl.mark = skb->mark; 35 fl.mark = skb->mark;
35 if (ip_route_output_key(&rt, &fl) != 0) 36 if (ip_route_output_key(&init_net, &rt, &fl) != 0)
36 return -1; 37 return -1;
37 38
38 /* Drop old route. */ 39 /* Drop old route. */
@@ -42,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
42 /* non-local src, find valid iif to satisfy 43 /* non-local src, find valid iif to satisfy
43 * rp-filter when calling ip_route_input. */ 44 * rp-filter when calling ip_route_input. */
44 fl.nl_u.ip4_u.daddr = iph->saddr; 45 fl.nl_u.ip4_u.daddr = iph->saddr;
45 if (ip_route_output_key(&rt, &fl) != 0) 46 if (ip_route_output_key(&init_net, &rt, &fl) != 0)
46 return -1; 47 return -1;
47 48
48 odst = skb->dst; 49 odst = skb->dst;
@@ -122,11 +123,12 @@ struct ip_rt_info {
122 u_int8_t tos; 123 u_int8_t tos;
123}; 124};
124 125
125static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info) 126static void nf_ip_saveroute(const struct sk_buff *skb,
127 struct nf_queue_entry *entry)
126{ 128{
127 struct ip_rt_info *rt_info = nf_info_reroute(info); 129 struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
128 130
129 if (info->hook == NF_IP_LOCAL_OUT) { 131 if (entry->hook == NF_INET_LOCAL_OUT) {
130 const struct iphdr *iph = ip_hdr(skb); 132 const struct iphdr *iph = ip_hdr(skb);
131 133
132 rt_info->tos = iph->tos; 134 rt_info->tos = iph->tos;
@@ -135,11 +137,12 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
135 } 137 }
136} 138}
137 139
138static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info) 140static int nf_ip_reroute(struct sk_buff *skb,
141 const struct nf_queue_entry *entry)
139{ 142{
140 const struct ip_rt_info *rt_info = nf_info_reroute(info); 143 const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
141 144
142 if (info->hook == NF_IP_LOCAL_OUT) { 145 if (entry->hook == NF_INET_LOCAL_OUT) {
143 const struct iphdr *iph = ip_hdr(skb); 146 const struct iphdr *iph = ip_hdr(skb);
144 147
145 if (!(iph->tos == rt_info->tos 148 if (!(iph->tos == rt_info->tos
@@ -158,7 +161,7 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
158 161
159 switch (skb->ip_summed) { 162 switch (skb->ip_summed) {
160 case CHECKSUM_COMPLETE: 163 case CHECKSUM_COMPLETE:
161 if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) 164 if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
162 break; 165 break;
163 if ((protocol == 0 && !csum_fold(skb->csum)) || 166 if ((protocol == 0 && !csum_fold(skb->csum)) ||
164 !csum_tcpudp_magic(iph->saddr, iph->daddr, 167 !csum_tcpudp_magic(iph->saddr, iph->daddr,
@@ -182,9 +185,15 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
182 185
183EXPORT_SYMBOL(nf_ip_checksum); 186EXPORT_SYMBOL(nf_ip_checksum);
184 187
185static struct nf_afinfo nf_ip_afinfo = { 188static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
189{
190 return ip_route_output_key(&init_net, (struct rtable **)dst, fl);
191}
192
193static const struct nf_afinfo nf_ip_afinfo = {
186 .family = AF_INET, 194 .family = AF_INET,
187 .checksum = nf_ip_checksum, 195 .checksum = nf_ip_checksum,
196 .route = nf_ip_route,
188 .saveroute = nf_ip_saveroute, 197 .saveroute = nf_ip_saveroute,
189 .reroute = nf_ip_reroute, 198 .reroute = nf_ip_reroute,
190 .route_key_size = sizeof(struct ip_rt_info), 199 .route_key_size = sizeof(struct ip_rt_info),
@@ -202,3 +211,13 @@ static void ipv4_netfilter_fini(void)
202 211
203module_init(ipv4_netfilter_init); 212module_init(ipv4_netfilter_init);
204module_exit(ipv4_netfilter_fini); 213module_exit(ipv4_netfilter_fini);
214
215#ifdef CONFIG_SYSCTL
216struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
217 { .procname = "net", .ctl_name = CTL_NET, },
218 { .procname = "ipv4", .ctl_name = NET_IPV4, },
219 { .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, },
220 { }
221};
222EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
223#endif /* CONFIG_SYSCTL */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fa97947c6ae1..9a077cb24798 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration"
8config NF_CONNTRACK_IPV4 8config NF_CONNTRACK_IPV4
9 tristate "IPv4 connection tracking support (required for NAT)" 9 tristate "IPv4 connection tracking support (required for NAT)"
10 depends on NF_CONNTRACK 10 depends on NF_CONNTRACK
11 default m if NETFILTER_ADVANCED=n
11 ---help--- 12 ---help---
12 Connection tracking keeps a record of what packets have passed 13 Connection tracking keeps a record of what packets have passed
13 through your machine, in order to figure out how they are related 14 through your machine, in order to figure out how they are related
@@ -32,6 +33,7 @@ config NF_CONNTRACK_PROC_COMPAT
32 33
33config IP_NF_QUEUE 34config IP_NF_QUEUE
34 tristate "IP Userspace queueing via NETLINK (OBSOLETE)" 35 tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
36 depends on NETFILTER_ADVANCED
35 help 37 help
36 Netfilter has the ability to queue packets to user space: the 38 Netfilter has the ability to queue packets to user space: the
37 netlink device can be used to access them using this driver. 39 netlink device can be used to access them using this driver.
@@ -44,6 +46,7 @@ config IP_NF_QUEUE
44 46
45config IP_NF_IPTABLES 47config IP_NF_IPTABLES
46 tristate "IP tables support (required for filtering/masq/NAT)" 48 tristate "IP tables support (required for filtering/masq/NAT)"
49 default m if NETFILTER_ADVANCED=n
47 select NETFILTER_XTABLES 50 select NETFILTER_XTABLES
48 help 51 help
49 iptables is a general, extensible packet identification framework. 52 iptables is a general, extensible packet identification framework.
@@ -54,27 +57,10 @@ config IP_NF_IPTABLES
54 To compile it as a module, choose M here. If unsure, say N. 57 To compile it as a module, choose M here. If unsure, say N.
55 58
56# The matches. 59# The matches.
57config IP_NF_MATCH_IPRANGE
58 tristate "IP range match support"
59 depends on IP_NF_IPTABLES
60 help
61 This option makes possible to match IP addresses against IP address
62 ranges.
63
64 To compile it as a module, choose M here. If unsure, say N.
65
66config IP_NF_MATCH_TOS
67 tristate "TOS match support"
68 depends on IP_NF_IPTABLES
69 help
70 TOS matching allows you to match packets based on the Type Of
71 Service fields of the IP packet.
72
73 To compile it as a module, choose M here. If unsure, say N.
74
75config IP_NF_MATCH_RECENT 60config IP_NF_MATCH_RECENT
76 tristate "recent match support" 61 tristate '"recent" match support'
77 depends on IP_NF_IPTABLES 62 depends on IP_NF_IPTABLES
63 depends on NETFILTER_ADVANCED
78 help 64 help
79 This match is used for creating one or many lists of recently 65 This match is used for creating one or many lists of recently
80 used addresses and then matching against that/those list(s). 66 used addresses and then matching against that/those list(s).
@@ -85,8 +71,9 @@ config IP_NF_MATCH_RECENT
85 To compile it as a module, choose M here. If unsure, say N. 71 To compile it as a module, choose M here. If unsure, say N.
86 72
87config IP_NF_MATCH_ECN 73config IP_NF_MATCH_ECN
88 tristate "ECN match support" 74 tristate '"ecn" match support'
89 depends on IP_NF_IPTABLES 75 depends on IP_NF_IPTABLES
76 depends on NETFILTER_ADVANCED
90 help 77 help
91 This option adds a `ECN' match, which allows you to match against 78 This option adds a `ECN' match, which allows you to match against
92 the IPv4 and TCP header ECN fields. 79 the IPv4 and TCP header ECN fields.
@@ -94,8 +81,9 @@ config IP_NF_MATCH_ECN
94 To compile it as a module, choose M here. If unsure, say N. 81 To compile it as a module, choose M here. If unsure, say N.
95 82
96config IP_NF_MATCH_AH 83config IP_NF_MATCH_AH
97 tristate "AH match support" 84 tristate '"ah" match support'
98 depends on IP_NF_IPTABLES 85 depends on IP_NF_IPTABLES
86 depends on NETFILTER_ADVANCED
99 help 87 help
100 This match extension allows you to match a range of SPIs 88 This match extension allows you to match a range of SPIs
101 inside AH header of IPSec packets. 89 inside AH header of IPSec packets.
@@ -103,37 +91,31 @@ config IP_NF_MATCH_AH
103 To compile it as a module, choose M here. If unsure, say N. 91 To compile it as a module, choose M here. If unsure, say N.
104 92
105config IP_NF_MATCH_TTL 93config IP_NF_MATCH_TTL
106 tristate "TTL match support" 94 tristate '"ttl" match support'
107 depends on IP_NF_IPTABLES 95 depends on IP_NF_IPTABLES
96 depends on NETFILTER_ADVANCED
108 help 97 help
109 This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user 98 This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
110 to match packets by their TTL value. 99 to match packets by their TTL value.
111 100
112 To compile it as a module, choose M here. If unsure, say N. 101 To compile it as a module, choose M here. If unsure, say N.
113 102
114config IP_NF_MATCH_OWNER
115 tristate "Owner match support"
116 depends on IP_NF_IPTABLES
117 help
118 Packet owner matching allows you to match locally-generated packets
119 based on who created them: the user, group, process or session.
120
121 To compile it as a module, choose M here. If unsure, say N.
122
123config IP_NF_MATCH_ADDRTYPE 103config IP_NF_MATCH_ADDRTYPE
124 tristate 'address type match support' 104 tristate '"addrtype" address type match support'
125 depends on IP_NF_IPTABLES 105 depends on IP_NF_IPTABLES
106 depends on NETFILTER_ADVANCED
126 help 107 help
127 This option allows you to match what routing thinks of an address, 108 This option allows you to match what routing thinks of an address,
128 eg. UNICAST, LOCAL, BROADCAST, ... 109 eg. UNICAST, LOCAL, BROADCAST, ...
129 110
130 If you want to compile it as a module, say M here and read 111 If you want to compile it as a module, say M here and read
131 <file:Documentation/modules.txt>. If unsure, say `N'. 112 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
132 113
133# `filter', generic and specific targets 114# `filter', generic and specific targets
134config IP_NF_FILTER 115config IP_NF_FILTER
135 tristate "Packet filtering" 116 tristate "Packet filtering"
136 depends on IP_NF_IPTABLES 117 depends on IP_NF_IPTABLES
118 default m if NETFILTER_ADVANCED=n
137 help 119 help
138 Packet filtering defines a table `filter', which has a series of 120 Packet filtering defines a table `filter', which has a series of
139 rules for simple packet filtering at local input, forwarding and 121 rules for simple packet filtering at local input, forwarding and
@@ -144,6 +126,7 @@ config IP_NF_FILTER
144config IP_NF_TARGET_REJECT 126config IP_NF_TARGET_REJECT
145 tristate "REJECT target support" 127 tristate "REJECT target support"
146 depends on IP_NF_FILTER 128 depends on IP_NF_FILTER
129 default m if NETFILTER_ADVANCED=n
147 help 130 help
148 The REJECT target allows a filtering rule to specify that an ICMP 131 The REJECT target allows a filtering rule to specify that an ICMP
149 error should be issued in response to an incoming packet, rather 132 error should be issued in response to an incoming packet, rather
@@ -154,6 +137,7 @@ config IP_NF_TARGET_REJECT
154config IP_NF_TARGET_LOG 137config IP_NF_TARGET_LOG
155 tristate "LOG target support" 138 tristate "LOG target support"
156 depends on IP_NF_IPTABLES 139 depends on IP_NF_IPTABLES
140 default m if NETFILTER_ADVANCED=n
157 help 141 help
158 This option adds a `LOG' target, which allows you to create rules in 142 This option adds a `LOG' target, which allows you to create rules in
159 any iptables table which records the packet header to the syslog. 143 any iptables table which records the packet header to the syslog.
@@ -163,6 +147,7 @@ config IP_NF_TARGET_LOG
163config IP_NF_TARGET_ULOG 147config IP_NF_TARGET_ULOG
164 tristate "ULOG target support" 148 tristate "ULOG target support"
165 depends on IP_NF_IPTABLES 149 depends on IP_NF_IPTABLES
150 default m if NETFILTER_ADVANCED=n
166 ---help--- 151 ---help---
167 152
168 This option enables the old IPv4-only "ipt_ULOG" implementation 153 This option enables the old IPv4-only "ipt_ULOG" implementation
@@ -183,6 +168,7 @@ config IP_NF_TARGET_ULOG
183config NF_NAT 168config NF_NAT
184 tristate "Full NAT" 169 tristate "Full NAT"
185 depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 170 depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4
171 default m if NETFILTER_ADVANCED=n
186 help 172 help
187 The Full NAT option allows masquerading, port forwarding and other 173 The Full NAT option allows masquerading, port forwarding and other
188 forms of full Network Address Port Translation. It is controlled by 174 forms of full Network Address Port Translation. It is controlled by
@@ -198,6 +184,7 @@ config NF_NAT_NEEDED
198config IP_NF_TARGET_MASQUERADE 184config IP_NF_TARGET_MASQUERADE
199 tristate "MASQUERADE target support" 185 tristate "MASQUERADE target support"
200 depends on NF_NAT 186 depends on NF_NAT
187 default m if NETFILTER_ADVANCED=n
201 help 188 help
202 Masquerading is a special case of NAT: all outgoing connections are 189 Masquerading is a special case of NAT: all outgoing connections are
203 changed to seem to come from a particular interface's address, and 190 changed to seem to come from a particular interface's address, and
@@ -210,6 +197,7 @@ config IP_NF_TARGET_MASQUERADE
210config IP_NF_TARGET_REDIRECT 197config IP_NF_TARGET_REDIRECT
211 tristate "REDIRECT target support" 198 tristate "REDIRECT target support"
212 depends on NF_NAT 199 depends on NF_NAT
200 depends on NETFILTER_ADVANCED
213 help 201 help
214 REDIRECT is a special case of NAT: all incoming connections are 202 REDIRECT is a special case of NAT: all incoming connections are
215 mapped onto the incoming interface's address, causing the packets to 203 mapped onto the incoming interface's address, causing the packets to
@@ -221,6 +209,7 @@ config IP_NF_TARGET_REDIRECT
221config IP_NF_TARGET_NETMAP 209config IP_NF_TARGET_NETMAP
222 tristate "NETMAP target support" 210 tristate "NETMAP target support"
223 depends on NF_NAT 211 depends on NF_NAT
212 depends on NETFILTER_ADVANCED
224 help 213 help
225 NETMAP is an implementation of static 1:1 NAT mapping of network 214 NETMAP is an implementation of static 1:1 NAT mapping of network
226 addresses. It maps the network address part, while keeping the host 215 addresses. It maps the network address part, while keeping the host
@@ -229,18 +218,10 @@ config IP_NF_TARGET_NETMAP
229 218
230 To compile it as a module, choose M here. If unsure, say N. 219 To compile it as a module, choose M here. If unsure, say N.
231 220
232config IP_NF_TARGET_SAME
233 tristate "SAME target support (OBSOLETE)"
234 depends on NF_NAT
235 help
236 This option adds a `SAME' target, which works like the standard SNAT
237 target, but attempts to give clients the same IP for all connections.
238
239 To compile it as a module, choose M here. If unsure, say N.
240
241config NF_NAT_SNMP_BASIC 221config NF_NAT_SNMP_BASIC
242 tristate "Basic SNMP-ALG support (EXPERIMENTAL)" 222 tristate "Basic SNMP-ALG support"
243 depends on EXPERIMENTAL && NF_NAT 223 depends on NF_NAT
224 depends on NETFILTER_ADVANCED
244 ---help--- 225 ---help---
245 226
246 This module implements an Application Layer Gateway (ALG) for 227 This module implements an Application Layer Gateway (ALG) for
@@ -304,6 +285,7 @@ config NF_NAT_SIP
304config IP_NF_MANGLE 285config IP_NF_MANGLE
305 tristate "Packet mangling" 286 tristate "Packet mangling"
306 depends on IP_NF_IPTABLES 287 depends on IP_NF_IPTABLES
288 default m if NETFILTER_ADVANCED=n
307 help 289 help
308 This option adds a `mangle' table to iptables: see the man page for 290 This option adds a `mangle' table to iptables: see the man page for
309 iptables(8). This table is used for various packet alterations 291 iptables(8). This table is used for various packet alterations
@@ -311,19 +293,10 @@ config IP_NF_MANGLE
311 293
312 To compile it as a module, choose M here. If unsure, say N. 294 To compile it as a module, choose M here. If unsure, say N.
313 295
314config IP_NF_TARGET_TOS
315 tristate "TOS target support"
316 depends on IP_NF_MANGLE
317 help
318 This option adds a `TOS' target, which allows you to create rules in
319 the `mangle' table which alter the Type Of Service field of an IP
320 packet prior to routing.
321
322 To compile it as a module, choose M here. If unsure, say N.
323
324config IP_NF_TARGET_ECN 296config IP_NF_TARGET_ECN
325 tristate "ECN target support" 297 tristate "ECN target support"
326 depends on IP_NF_MANGLE 298 depends on IP_NF_MANGLE
299 depends on NETFILTER_ADVANCED
327 ---help--- 300 ---help---
328 This option adds a `ECN' target, which can be used in the iptables mangle 301 This option adds a `ECN' target, which can be used in the iptables mangle
329 table. 302 table.
@@ -338,6 +311,7 @@ config IP_NF_TARGET_ECN
338config IP_NF_TARGET_TTL 311config IP_NF_TARGET_TTL
339 tristate 'TTL target support' 312 tristate 'TTL target support'
340 depends on IP_NF_MANGLE 313 depends on IP_NF_MANGLE
314 depends on NETFILTER_ADVANCED
341 help 315 help
342 This option adds a `TTL' target, which enables the user to modify 316 This option adds a `TTL' target, which enables the user to modify
343 the TTL value of the IP header. 317 the TTL value of the IP header.
@@ -353,6 +327,7 @@ config IP_NF_TARGET_CLUSTERIP
353 tristate "CLUSTERIP target support (EXPERIMENTAL)" 327 tristate "CLUSTERIP target support (EXPERIMENTAL)"
354 depends on IP_NF_MANGLE && EXPERIMENTAL 328 depends on IP_NF_MANGLE && EXPERIMENTAL
355 depends on NF_CONNTRACK_IPV4 329 depends on NF_CONNTRACK_IPV4
330 depends on NETFILTER_ADVANCED
356 select NF_CONNTRACK_MARK 331 select NF_CONNTRACK_MARK
357 help 332 help
358 The CLUSTERIP target allows you to build load-balancing clusters of 333 The CLUSTERIP target allows you to build load-balancing clusters of
@@ -365,18 +340,20 @@ config IP_NF_TARGET_CLUSTERIP
365config IP_NF_RAW 340config IP_NF_RAW
366 tristate 'raw table support (required for NOTRACK/TRACE)' 341 tristate 'raw table support (required for NOTRACK/TRACE)'
367 depends on IP_NF_IPTABLES 342 depends on IP_NF_IPTABLES
343 depends on NETFILTER_ADVANCED
368 help 344 help
369 This option adds a `raw' table to iptables. This table is the very 345 This option adds a `raw' table to iptables. This table is the very
370 first in the netfilter framework and hooks in at the PREROUTING 346 first in the netfilter framework and hooks in at the PREROUTING
371 and OUTPUT chains. 347 and OUTPUT chains.
372 348
373 If you want to compile it as a module, say M here and read 349 If you want to compile it as a module, say M here and read
374 <file:Documentation/modules.txt>. If unsure, say `N'. 350 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
375 351
376# ARP tables 352# ARP tables
377config IP_NF_ARPTABLES 353config IP_NF_ARPTABLES
378 tristate "ARP tables support" 354 tristate "ARP tables support"
379 select NETFILTER_XTABLES 355 select NETFILTER_XTABLES
356 depends on NETFILTER_ADVANCED
380 help 357 help
381 arptables is a general, extensible packet identification framework. 358 arptables is a general, extensible packet identification framework.
382 The ARP packet filtering and mangling (manipulation)subsystems 359 The ARP packet filtering and mangling (manipulation)subsystems
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 409d273f6f82..0c7dc78a62e9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -41,27 +41,22 @@ obj-$(CONFIG_NF_NAT) += iptable_nat.o
41obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o 41obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
42 42
43# matches 43# matches
44obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o 44obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
45obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
46obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
47obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
48obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
49obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 45obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
46obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
47obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
50obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o 48obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
51obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
52 49
53# targets 50# targets
54obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 51obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
55obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
56obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o 52obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
53obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
57obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o 54obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
58obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
59obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o 55obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
60obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o 56obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
61obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o 57obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
62obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
63obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
64obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o 58obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
59obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
65 60
66# generic ARP tables 61# generic ARP tables
67obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o 62obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2909c92ecd99..a7591ce344d2 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -19,9 +19,11 @@
19#include <linux/proc_fs.h> 19#include <linux/proc_fs.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/init.h> 21#include <linux/init.h>
22
23#include <asm/uaccess.h>
24#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/err.h>
24#include <net/compat.h>
25#include <net/sock.h>
26#include <asm/uaccess.h>
25 27
26#include <linux/netfilter/x_tables.h> 28#include <linux/netfilter/x_tables.h>
27#include <linux/netfilter_arp/arp_tables.h> 29#include <linux/netfilter_arp/arp_tables.h>
@@ -83,7 +85,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
83 __be32 src_ipaddr, tgt_ipaddr; 85 __be32 src_ipaddr, tgt_ipaddr;
84 int i, ret; 86 int i, ret;
85 87
86#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg)) 88#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
87 89
88 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, 90 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
89 ARPT_INV_ARPOP)) { 91 ARPT_INV_ARPOP)) {
@@ -179,6 +181,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
179 } 181 }
180 182
181 return 1; 183 return 1;
184#undef FWINV
182} 185}
183 186
184static inline int arp_checkentry(const struct arpt_arp *arp) 187static inline int arp_checkentry(const struct arpt_arp *arp)
@@ -435,29 +438,9 @@ static int mark_source_chains(struct xt_table_info *newinfo,
435 return 1; 438 return 1;
436} 439}
437 440
438static inline int standard_check(const struct arpt_entry_target *t, 441static inline int check_entry(struct arpt_entry *e, const char *name)
439 unsigned int max_offset)
440{
441 /* Check standard info. */
442 if (t->u.target_size
443 != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
444 duprintf("arpt_standard_check: target size %u != %Zu\n",
445 t->u.target_size,
446 ARPT_ALIGN(sizeof(struct arpt_standard_target)));
447 return 0;
448 }
449
450 return 1;
451}
452
453static struct arpt_target arpt_standard_target;
454
455static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
456 unsigned int *i)
457{ 442{
458 struct arpt_entry_target *t; 443 struct arpt_entry_target *t;
459 struct arpt_target *target;
460 int ret;
461 444
462 if (!arp_checkentry(&e->arp)) { 445 if (!arp_checkentry(&e->arp)) {
463 duprintf("arp_tables: arp check failed %p %s.\n", e, name); 446 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
@@ -471,35 +454,57 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
471 if (e->target_offset + t->u.target_size > e->next_offset) 454 if (e->target_offset + t->u.target_size > e->next_offset)
472 return -EINVAL; 455 return -EINVAL;
473 456
457 return 0;
458}
459
460static inline int check_target(struct arpt_entry *e, const char *name)
461{
462 struct arpt_entry_target *t;
463 struct arpt_target *target;
464 int ret;
465
466 t = arpt_get_target(e);
467 target = t->u.kernel.target;
468
469 ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t),
470 name, e->comefrom, 0, 0);
471 if (!ret && t->u.kernel.target->checkentry
472 && !t->u.kernel.target->checkentry(name, e, target, t->data,
473 e->comefrom)) {
474 duprintf("arp_tables: check failed for `%s'.\n",
475 t->u.kernel.target->name);
476 ret = -EINVAL;
477 }
478 return ret;
479}
480
481static inline int
482find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
483 unsigned int *i)
484{
485 struct arpt_entry_target *t;
486 struct arpt_target *target;
487 int ret;
488
489 ret = check_entry(e, name);
490 if (ret)
491 return ret;
492
493 t = arpt_get_target(e);
474 target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, 494 target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
475 t->u.user.revision), 495 t->u.user.revision),
476 "arpt_%s", t->u.user.name); 496 "arpt_%s", t->u.user.name);
477 if (IS_ERR(target) || !target) { 497 if (IS_ERR(target) || !target) {
478 duprintf("check_entry: `%s' not found\n", t->u.user.name); 498 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
479 ret = target ? PTR_ERR(target) : -ENOENT; 499 ret = target ? PTR_ERR(target) : -ENOENT;
480 goto out; 500 goto out;
481 } 501 }
482 t->u.kernel.target = target; 502 t->u.kernel.target = target;
483 503
484 ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t), 504 ret = check_target(e, name);
485 name, e->comefrom, 0, 0);
486 if (ret) 505 if (ret)
487 goto err; 506 goto err;
488 507
489 if (t->u.kernel.target == &arpt_standard_target) {
490 if (!standard_check(t, size)) {
491 ret = -EINVAL;
492 goto err;
493 }
494 } else if (t->u.kernel.target->checkentry
495 && !t->u.kernel.target->checkentry(name, e, target, t->data,
496 e->comefrom)) {
497 duprintf("arp_tables: check failed for `%s'.\n",
498 t->u.kernel.target->name);
499 ret = -EINVAL;
500 goto err;
501 }
502
503 (*i)++; 508 (*i)++;
504 return 0; 509 return 0;
505err: 510err:
@@ -633,7 +638,7 @@ static int translate_table(const char *name,
633 /* Finally, each sanity check must pass */ 638 /* Finally, each sanity check must pass */
634 i = 0; 639 i = 0;
635 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, 640 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
636 check_entry, name, size, &i); 641 find_check_entry, name, size, &i);
637 642
638 if (ret != 0) { 643 if (ret != 0) {
639 ARPT_ENTRY_ITERATE(entry0, newinfo->size, 644 ARPT_ENTRY_ITERATE(entry0, newinfo->size,
@@ -704,16 +709,11 @@ static void get_counters(const struct xt_table_info *t,
704 } 709 }
705} 710}
706 711
707static int copy_entries_to_user(unsigned int total_size, 712static inline struct xt_counters *alloc_counters(struct arpt_table *table)
708 struct arpt_table *table,
709 void __user *userptr)
710{ 713{
711 unsigned int off, num, countersize; 714 unsigned int countersize;
712 struct arpt_entry *e;
713 struct xt_counters *counters; 715 struct xt_counters *counters;
714 struct xt_table_info *private = table->private; 716 struct xt_table_info *private = table->private;
715 int ret = 0;
716 void *loc_cpu_entry;
717 717
718 /* We need atomic snapshot of counters: rest doesn't change 718 /* We need atomic snapshot of counters: rest doesn't change
719 * (other than comefrom, which userspace doesn't care 719 * (other than comefrom, which userspace doesn't care
@@ -723,13 +723,31 @@ static int copy_entries_to_user(unsigned int total_size,
723 counters = vmalloc_node(countersize, numa_node_id()); 723 counters = vmalloc_node(countersize, numa_node_id());
724 724
725 if (counters == NULL) 725 if (counters == NULL)
726 return -ENOMEM; 726 return ERR_PTR(-ENOMEM);
727 727
728 /* First, sum counters... */ 728 /* First, sum counters... */
729 write_lock_bh(&table->lock); 729 write_lock_bh(&table->lock);
730 get_counters(private, counters); 730 get_counters(private, counters);
731 write_unlock_bh(&table->lock); 731 write_unlock_bh(&table->lock);
732 732
733 return counters;
734}
735
736static int copy_entries_to_user(unsigned int total_size,
737 struct arpt_table *table,
738 void __user *userptr)
739{
740 unsigned int off, num;
741 struct arpt_entry *e;
742 struct xt_counters *counters;
743 struct xt_table_info *private = table->private;
744 int ret = 0;
745 void *loc_cpu_entry;
746
747 counters = alloc_counters(table);
748 if (IS_ERR(counters))
749 return PTR_ERR(counters);
750
733 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 751 loc_cpu_entry = private->entries[raw_smp_processor_id()];
734 /* ... then copy entire thing ... */ 752 /* ... then copy entire thing ... */
735 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 753 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
@@ -767,23 +785,160 @@ static int copy_entries_to_user(unsigned int total_size,
767 return ret; 785 return ret;
768} 786}
769 787
770static int get_entries(const struct arpt_get_entries *entries, 788#ifdef CONFIG_COMPAT
771 struct arpt_get_entries __user *uptr) 789static void compat_standard_from_user(void *dst, void *src)
790{
791 int v = *(compat_int_t *)src;
792
793 if (v > 0)
794 v += xt_compat_calc_jump(NF_ARP, v);
795 memcpy(dst, &v, sizeof(v));
796}
797
798static int compat_standard_to_user(void __user *dst, void *src)
799{
800 compat_int_t cv = *(int *)src;
801
802 if (cv > 0)
803 cv -= xt_compat_calc_jump(NF_ARP, cv);
804 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
805}
806
807static int compat_calc_entry(struct arpt_entry *e,
808 const struct xt_table_info *info,
809 void *base, struct xt_table_info *newinfo)
810{
811 struct arpt_entry_target *t;
812 unsigned int entry_offset;
813 int off, i, ret;
814
815 off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
816 entry_offset = (void *)e - base;
817
818 t = arpt_get_target(e);
819 off += xt_compat_target_offset(t->u.kernel.target);
820 newinfo->size -= off;
821 ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
822 if (ret)
823 return ret;
824
825 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
826 if (info->hook_entry[i] &&
827 (e < (struct arpt_entry *)(base + info->hook_entry[i])))
828 newinfo->hook_entry[i] -= off;
829 if (info->underflow[i] &&
830 (e < (struct arpt_entry *)(base + info->underflow[i])))
831 newinfo->underflow[i] -= off;
832 }
833 return 0;
834}
835
836static int compat_table_info(const struct xt_table_info *info,
837 struct xt_table_info *newinfo)
772{ 838{
839 void *loc_cpu_entry;
840
841 if (!newinfo || !info)
842 return -EINVAL;
843
844 /* we dont care about newinfo->entries[] */
845 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
846 newinfo->initial_entries = 0;
847 loc_cpu_entry = info->entries[raw_smp_processor_id()];
848 return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
849 compat_calc_entry, info, loc_cpu_entry,
850 newinfo);
851}
852#endif
853
854static int get_info(struct net *net, void __user *user, int *len, int compat)
855{
856 char name[ARPT_TABLE_MAXNAMELEN];
857 struct arpt_table *t;
773 int ret; 858 int ret;
859
860 if (*len != sizeof(struct arpt_getinfo)) {
861 duprintf("length %u != %Zu\n", *len,
862 sizeof(struct arpt_getinfo));
863 return -EINVAL;
864 }
865
866 if (copy_from_user(name, user, sizeof(name)) != 0)
867 return -EFAULT;
868
869 name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
870#ifdef CONFIG_COMPAT
871 if (compat)
872 xt_compat_lock(NF_ARP);
873#endif
874 t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
875 "arptable_%s", name);
876 if (t && !IS_ERR(t)) {
877 struct arpt_getinfo info;
878 struct xt_table_info *private = t->private;
879
880#ifdef CONFIG_COMPAT
881 if (compat) {
882 struct xt_table_info tmp;
883 ret = compat_table_info(private, &tmp);
884 xt_compat_flush_offsets(NF_ARP);
885 private = &tmp;
886 }
887#endif
888 info.valid_hooks = t->valid_hooks;
889 memcpy(info.hook_entry, private->hook_entry,
890 sizeof(info.hook_entry));
891 memcpy(info.underflow, private->underflow,
892 sizeof(info.underflow));
893 info.num_entries = private->number;
894 info.size = private->size;
895 strcpy(info.name, name);
896
897 if (copy_to_user(user, &info, *len) != 0)
898 ret = -EFAULT;
899 else
900 ret = 0;
901 xt_table_unlock(t);
902 module_put(t->me);
903 } else
904 ret = t ? PTR_ERR(t) : -ENOENT;
905#ifdef CONFIG_COMPAT
906 if (compat)
907 xt_compat_unlock(NF_ARP);
908#endif
909 return ret;
910}
911
912static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
913 int *len)
914{
915 int ret;
916 struct arpt_get_entries get;
774 struct arpt_table *t; 917 struct arpt_table *t;
775 918
776 t = xt_find_table_lock(NF_ARP, entries->name); 919 if (*len < sizeof(get)) {
920 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
921 return -EINVAL;
922 }
923 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
924 return -EFAULT;
925 if (*len != sizeof(struct arpt_get_entries) + get.size) {
926 duprintf("get_entries: %u != %Zu\n", *len,
927 sizeof(struct arpt_get_entries) + get.size);
928 return -EINVAL;
929 }
930
931 t = xt_find_table_lock(net, NF_ARP, get.name);
777 if (t && !IS_ERR(t)) { 932 if (t && !IS_ERR(t)) {
778 struct xt_table_info *private = t->private; 933 struct xt_table_info *private = t->private;
779 duprintf("t->private->number = %u\n", 934 duprintf("t->private->number = %u\n",
780 private->number); 935 private->number);
781 if (entries->size == private->size) 936 if (get.size == private->size)
782 ret = copy_entries_to_user(private->size, 937 ret = copy_entries_to_user(private->size,
783 t, uptr->entrytable); 938 t, uptr->entrytable);
784 else { 939 else {
785 duprintf("get_entries: I've got %u not %u!\n", 940 duprintf("get_entries: I've got %u not %u!\n",
786 private->size, entries->size); 941 private->size, get.size);
787 ret = -EINVAL; 942 ret = -EINVAL;
788 } 943 }
789 module_put(t->me); 944 module_put(t->me);
@@ -794,71 +949,42 @@ static int get_entries(const struct arpt_get_entries *entries,
794 return ret; 949 return ret;
795} 950}
796 951
797static int do_replace(void __user *user, unsigned int len) 952static int __do_replace(struct net *net, const char *name,
953 unsigned int valid_hooks,
954 struct xt_table_info *newinfo,
955 unsigned int num_counters,
956 void __user *counters_ptr)
798{ 957{
799 int ret; 958 int ret;
800 struct arpt_replace tmp;
801 struct arpt_table *t; 959 struct arpt_table *t;
802 struct xt_table_info *newinfo, *oldinfo; 960 struct xt_table_info *oldinfo;
803 struct xt_counters *counters; 961 struct xt_counters *counters;
804 void *loc_cpu_entry, *loc_cpu_old_entry; 962 void *loc_cpu_old_entry;
805
806 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
807 return -EFAULT;
808
809 /* Hack: Causes ipchains to give correct error msg --RR */
810 if (len != sizeof(tmp) + tmp.size)
811 return -ENOPROTOOPT;
812
813 /* overflow check */
814 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
815 SMP_CACHE_BYTES)
816 return -ENOMEM;
817 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
818 return -ENOMEM;
819
820 newinfo = xt_alloc_table_info(tmp.size);
821 if (!newinfo)
822 return -ENOMEM;
823
824 /* choose the copy that is on our node/cpu */
825 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
826 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
827 tmp.size) != 0) {
828 ret = -EFAULT;
829 goto free_newinfo;
830 }
831 963
832 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters)); 964 ret = 0;
965 counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
966 numa_node_id());
833 if (!counters) { 967 if (!counters) {
834 ret = -ENOMEM; 968 ret = -ENOMEM;
835 goto free_newinfo; 969 goto out;
836 } 970 }
837 971
838 ret = translate_table(tmp.name, tmp.valid_hooks, 972 t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
839 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, 973 "arptable_%s", name);
840 tmp.hook_entry, tmp.underflow);
841 if (ret != 0)
842 goto free_newinfo_counters;
843
844 duprintf("arp_tables: Translated table\n");
845
846 t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
847 "arptable_%s", tmp.name);
848 if (!t || IS_ERR(t)) { 974 if (!t || IS_ERR(t)) {
849 ret = t ? PTR_ERR(t) : -ENOENT; 975 ret = t ? PTR_ERR(t) : -ENOENT;
850 goto free_newinfo_counters_untrans; 976 goto free_newinfo_counters_untrans;
851 } 977 }
852 978
853 /* You lied! */ 979 /* You lied! */
854 if (tmp.valid_hooks != t->valid_hooks) { 980 if (valid_hooks != t->valid_hooks) {
855 duprintf("Valid hook crap: %08X vs %08X\n", 981 duprintf("Valid hook crap: %08X vs %08X\n",
856 tmp.valid_hooks, t->valid_hooks); 982 valid_hooks, t->valid_hooks);
857 ret = -EINVAL; 983 ret = -EINVAL;
858 goto put_module; 984 goto put_module;
859 } 985 }
860 986
861 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); 987 oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
862 if (!oldinfo) 988 if (!oldinfo)
863 goto put_module; 989 goto put_module;
864 990
@@ -876,11 +1002,12 @@ static int do_replace(void __user *user, unsigned int len)
876 get_counters(oldinfo, counters); 1002 get_counters(oldinfo, counters);
877 /* Decrease module usage counts and free resource */ 1003 /* Decrease module usage counts and free resource */
878 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1004 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
879 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); 1005 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1006 NULL);
880 1007
881 xt_free_table_info(oldinfo); 1008 xt_free_table_info(oldinfo);
882 if (copy_to_user(tmp.counters, counters, 1009 if (copy_to_user(counters_ptr, counters,
883 sizeof(struct xt_counters) * tmp.num_counters) != 0) 1010 sizeof(struct xt_counters) * num_counters) != 0)
884 ret = -EFAULT; 1011 ret = -EFAULT;
885 vfree(counters); 1012 vfree(counters);
886 xt_table_unlock(t); 1013 xt_table_unlock(t);
@@ -890,9 +1017,53 @@ static int do_replace(void __user *user, unsigned int len)
890 module_put(t->me); 1017 module_put(t->me);
891 xt_table_unlock(t); 1018 xt_table_unlock(t);
892 free_newinfo_counters_untrans: 1019 free_newinfo_counters_untrans:
893 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
894 free_newinfo_counters:
895 vfree(counters); 1020 vfree(counters);
1021 out:
1022 return ret;
1023}
1024
1025static int do_replace(struct net *net, void __user *user, unsigned int len)
1026{
1027 int ret;
1028 struct arpt_replace tmp;
1029 struct xt_table_info *newinfo;
1030 void *loc_cpu_entry;
1031
1032 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1033 return -EFAULT;
1034
1035 /* overflow check */
1036 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1037 return -ENOMEM;
1038
1039 newinfo = xt_alloc_table_info(tmp.size);
1040 if (!newinfo)
1041 return -ENOMEM;
1042
1043 /* choose the copy that is on our node/cpu */
1044 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1045 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1046 tmp.size) != 0) {
1047 ret = -EFAULT;
1048 goto free_newinfo;
1049 }
1050
1051 ret = translate_table(tmp.name, tmp.valid_hooks,
1052 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1053 tmp.hook_entry, tmp.underflow);
1054 if (ret != 0)
1055 goto free_newinfo;
1056
1057 duprintf("arp_tables: Translated table\n");
1058
1059 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1060 tmp.num_counters, tmp.counters);
1061 if (ret)
1062 goto free_newinfo_untrans;
1063 return 0;
1064
1065 free_newinfo_untrans:
1066 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
896 free_newinfo: 1067 free_newinfo:
897 xt_free_table_info(newinfo); 1068 xt_free_table_info(newinfo);
898 return ret; 1069 return ret;
@@ -912,31 +1083,60 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
912 return 0; 1083 return 0;
913} 1084}
914 1085
915static int do_add_counters(void __user *user, unsigned int len) 1086static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1087 int compat)
916{ 1088{
917 unsigned int i; 1089 unsigned int i;
918 struct xt_counters_info tmp, *paddc; 1090 struct xt_counters_info tmp;
1091 struct xt_counters *paddc;
1092 unsigned int num_counters;
1093 char *name;
1094 int size;
1095 void *ptmp;
919 struct arpt_table *t; 1096 struct arpt_table *t;
920 struct xt_table_info *private; 1097 struct xt_table_info *private;
921 int ret = 0; 1098 int ret = 0;
922 void *loc_cpu_entry; 1099 void *loc_cpu_entry;
1100#ifdef CONFIG_COMPAT
1101 struct compat_xt_counters_info compat_tmp;
923 1102
924 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1103 if (compat) {
1104 ptmp = &compat_tmp;
1105 size = sizeof(struct compat_xt_counters_info);
1106 } else
1107#endif
1108 {
1109 ptmp = &tmp;
1110 size = sizeof(struct xt_counters_info);
1111 }
1112
1113 if (copy_from_user(ptmp, user, size) != 0)
925 return -EFAULT; 1114 return -EFAULT;
926 1115
927 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) 1116#ifdef CONFIG_COMPAT
1117 if (compat) {
1118 num_counters = compat_tmp.num_counters;
1119 name = compat_tmp.name;
1120 } else
1121#endif
1122 {
1123 num_counters = tmp.num_counters;
1124 name = tmp.name;
1125 }
1126
1127 if (len != size + num_counters * sizeof(struct xt_counters))
928 return -EINVAL; 1128 return -EINVAL;
929 1129
930 paddc = vmalloc(len); 1130 paddc = vmalloc_node(len - size, numa_node_id());
931 if (!paddc) 1131 if (!paddc)
932 return -ENOMEM; 1132 return -ENOMEM;
933 1133
934 if (copy_from_user(paddc, user, len) != 0) { 1134 if (copy_from_user(paddc, user + size, len - size) != 0) {
935 ret = -EFAULT; 1135 ret = -EFAULT;
936 goto free; 1136 goto free;
937 } 1137 }
938 1138
939 t = xt_find_table_lock(NF_ARP, tmp.name); 1139 t = xt_find_table_lock(net, NF_ARP, name);
940 if (!t || IS_ERR(t)) { 1140 if (!t || IS_ERR(t)) {
941 ret = t ? PTR_ERR(t) : -ENOENT; 1141 ret = t ? PTR_ERR(t) : -ENOENT;
942 goto free; 1142 goto free;
@@ -944,7 +1144,7 @@ static int do_add_counters(void __user *user, unsigned int len)
944 1144
945 write_lock_bh(&t->lock); 1145 write_lock_bh(&t->lock);
946 private = t->private; 1146 private = t->private;
947 if (private->number != tmp.num_counters) { 1147 if (private->number != num_counters) {
948 ret = -EINVAL; 1148 ret = -EINVAL;
949 goto unlock_up_free; 1149 goto unlock_up_free;
950 } 1150 }
@@ -955,7 +1155,7 @@ static int do_add_counters(void __user *user, unsigned int len)
955 ARPT_ENTRY_ITERATE(loc_cpu_entry, 1155 ARPT_ENTRY_ITERATE(loc_cpu_entry,
956 private->size, 1156 private->size,
957 add_counter_to_entry, 1157 add_counter_to_entry,
958 paddc->counters, 1158 paddc,
959 &i); 1159 &i);
960 unlock_up_free: 1160 unlock_up_free:
961 write_unlock_bh(&t->lock); 1161 write_unlock_bh(&t->lock);
@@ -967,7 +1167,330 @@ static int do_add_counters(void __user *user, unsigned int len)
967 return ret; 1167 return ret;
968} 1168}
969 1169
970static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1170#ifdef CONFIG_COMPAT
1171static inline int
1172compat_release_entry(struct compat_arpt_entry *e, unsigned int *i)
1173{
1174 struct arpt_entry_target *t;
1175
1176 if (i && (*i)-- == 0)
1177 return 1;
1178
1179 t = compat_arpt_get_target(e);
1180 module_put(t->u.kernel.target->me);
1181 return 0;
1182}
1183
1184static inline int
1185check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1186 struct xt_table_info *newinfo,
1187 unsigned int *size,
1188 unsigned char *base,
1189 unsigned char *limit,
1190 unsigned int *hook_entries,
1191 unsigned int *underflows,
1192 unsigned int *i,
1193 const char *name)
1194{
1195 struct arpt_entry_target *t;
1196 struct xt_target *target;
1197 unsigned int entry_offset;
1198 int ret, off, h;
1199
1200 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1201 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0
1202 || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
1203 duprintf("Bad offset %p, limit = %p\n", e, limit);
1204 return -EINVAL;
1205 }
1206
1207 if (e->next_offset < sizeof(struct compat_arpt_entry) +
1208 sizeof(struct compat_xt_entry_target)) {
1209 duprintf("checking: element %p size %u\n",
1210 e, e->next_offset);
1211 return -EINVAL;
1212 }
1213
1214 /* For purposes of check_entry casting the compat entry is fine */
1215 ret = check_entry((struct arpt_entry *)e, name);
1216 if (ret)
1217 return ret;
1218
1219 off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
1220 entry_offset = (void *)e - (void *)base;
1221
1222 t = compat_arpt_get_target(e);
1223 target = try_then_request_module(xt_find_target(NF_ARP,
1224 t->u.user.name,
1225 t->u.user.revision),
1226 "arpt_%s", t->u.user.name);
1227 if (IS_ERR(target) || !target) {
1228 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1229 t->u.user.name);
1230 ret = target ? PTR_ERR(target) : -ENOENT;
1231 goto out;
1232 }
1233 t->u.kernel.target = target;
1234
1235 off += xt_compat_target_offset(target);
1236 *size += off;
1237 ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
1238 if (ret)
1239 goto release_target;
1240
1241 /* Check hooks & underflows */
1242 for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
1243 if ((unsigned char *)e - base == hook_entries[h])
1244 newinfo->hook_entry[h] = hook_entries[h];
1245 if ((unsigned char *)e - base == underflows[h])
1246 newinfo->underflow[h] = underflows[h];
1247 }
1248
1249 /* Clear counters and comefrom */
1250 memset(&e->counters, 0, sizeof(e->counters));
1251 e->comefrom = 0;
1252
1253 (*i)++;
1254 return 0;
1255
1256release_target:
1257 module_put(t->u.kernel.target->me);
1258out:
1259 return ret;
1260}
1261
1262static int
1263compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
1264 unsigned int *size, const char *name,
1265 struct xt_table_info *newinfo, unsigned char *base)
1266{
1267 struct arpt_entry_target *t;
1268 struct xt_target *target;
1269 struct arpt_entry *de;
1270 unsigned int origsize;
1271 int ret, h;
1272
1273 ret = 0;
1274 origsize = *size;
1275 de = (struct arpt_entry *)*dstptr;
1276 memcpy(de, e, sizeof(struct arpt_entry));
1277 memcpy(&de->counters, &e->counters, sizeof(e->counters));
1278
1279 *dstptr += sizeof(struct arpt_entry);
1280 *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
1281
1282 de->target_offset = e->target_offset - (origsize - *size);
1283 t = compat_arpt_get_target(e);
1284 target = t->u.kernel.target;
1285 xt_compat_target_from_user(t, dstptr, size);
1286
1287 de->next_offset = e->next_offset - (origsize - *size);
1288 for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
1289 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1290 newinfo->hook_entry[h] -= origsize - *size;
1291 if ((unsigned char *)de - base < newinfo->underflow[h])
1292 newinfo->underflow[h] -= origsize - *size;
1293 }
1294 return ret;
1295}
1296
1297static inline int compat_check_entry(struct arpt_entry *e, const char *name,
1298 unsigned int *i)
1299{
1300 int ret;
1301
1302 ret = check_target(e, name);
1303 if (ret)
1304 return ret;
1305
1306 (*i)++;
1307 return 0;
1308}
1309
1310static int translate_compat_table(const char *name,
1311 unsigned int valid_hooks,
1312 struct xt_table_info **pinfo,
1313 void **pentry0,
1314 unsigned int total_size,
1315 unsigned int number,
1316 unsigned int *hook_entries,
1317 unsigned int *underflows)
1318{
1319 unsigned int i, j;
1320 struct xt_table_info *newinfo, *info;
1321 void *pos, *entry0, *entry1;
1322 unsigned int size;
1323 int ret;
1324
1325 info = *pinfo;
1326 entry0 = *pentry0;
1327 size = total_size;
1328 info->number = number;
1329
1330 /* Init all hooks to impossible value. */
1331 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
1332 info->hook_entry[i] = 0xFFFFFFFF;
1333 info->underflow[i] = 0xFFFFFFFF;
1334 }
1335
1336 duprintf("translate_compat_table: size %u\n", info->size);
1337 j = 0;
1338 xt_compat_lock(NF_ARP);
1339 /* Walk through entries, checking offsets. */
1340 ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
1341 check_compat_entry_size_and_hooks,
1342 info, &size, entry0,
1343 entry0 + total_size,
1344 hook_entries, underflows, &j, name);
1345 if (ret != 0)
1346 goto out_unlock;
1347
1348 ret = -EINVAL;
1349 if (j != number) {
1350 duprintf("translate_compat_table: %u not %u entries\n",
1351 j, number);
1352 goto out_unlock;
1353 }
1354
1355 /* Check hooks all assigned */
1356 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
1357 /* Only hooks which are valid */
1358 if (!(valid_hooks & (1 << i)))
1359 continue;
1360 if (info->hook_entry[i] == 0xFFFFFFFF) {
1361 duprintf("Invalid hook entry %u %u\n",
1362 i, hook_entries[i]);
1363 goto out_unlock;
1364 }
1365 if (info->underflow[i] == 0xFFFFFFFF) {
1366 duprintf("Invalid underflow %u %u\n",
1367 i, underflows[i]);
1368 goto out_unlock;
1369 }
1370 }
1371
1372 ret = -ENOMEM;
1373 newinfo = xt_alloc_table_info(size);
1374 if (!newinfo)
1375 goto out_unlock;
1376
1377 newinfo->number = number;
1378 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
1379 newinfo->hook_entry[i] = info->hook_entry[i];
1380 newinfo->underflow[i] = info->underflow[i];
1381 }
1382 entry1 = newinfo->entries[raw_smp_processor_id()];
1383 pos = entry1;
1384 size = total_size;
1385 ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
1386 compat_copy_entry_from_user,
1387 &pos, &size, name, newinfo, entry1);
1388 xt_compat_flush_offsets(NF_ARP);
1389 xt_compat_unlock(NF_ARP);
1390 if (ret)
1391 goto free_newinfo;
1392
1393 ret = -ELOOP;
1394 if (!mark_source_chains(newinfo, valid_hooks, entry1))
1395 goto free_newinfo;
1396
1397 i = 0;
1398 ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1399 name, &i);
1400 if (ret) {
1401 j -= i;
1402 COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1403 compat_release_entry, &j);
1404 ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1405 xt_free_table_info(newinfo);
1406 return ret;
1407 }
1408
1409 /* And one copy for every other CPU */
1410 for_each_possible_cpu(i)
1411 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1412 memcpy(newinfo->entries[i], entry1, newinfo->size);
1413
1414 *pinfo = newinfo;
1415 *pentry0 = entry1;
1416 xt_free_table_info(info);
1417 return 0;
1418
1419free_newinfo:
1420 xt_free_table_info(newinfo);
1421out:
1422 COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1423 return ret;
1424out_unlock:
1425 xt_compat_flush_offsets(NF_ARP);
1426 xt_compat_unlock(NF_ARP);
1427 goto out;
1428}
1429
1430struct compat_arpt_replace {
1431 char name[ARPT_TABLE_MAXNAMELEN];
1432 u32 valid_hooks;
1433 u32 num_entries;
1434 u32 size;
1435 u32 hook_entry[NF_ARP_NUMHOOKS];
1436 u32 underflow[NF_ARP_NUMHOOKS];
1437 u32 num_counters;
1438 compat_uptr_t counters;
1439 struct compat_arpt_entry entries[0];
1440};
1441
1442static int compat_do_replace(struct net *net, void __user *user,
1443 unsigned int len)
1444{
1445 int ret;
1446 struct compat_arpt_replace tmp;
1447 struct xt_table_info *newinfo;
1448 void *loc_cpu_entry;
1449
1450 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1451 return -EFAULT;
1452
1453 /* overflow check */
1454 if (tmp.size >= INT_MAX / num_possible_cpus())
1455 return -ENOMEM;
1456 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1457 return -ENOMEM;
1458
1459 newinfo = xt_alloc_table_info(tmp.size);
1460 if (!newinfo)
1461 return -ENOMEM;
1462
1463 /* choose the copy that is on our node/cpu */
1464 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1465 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
1466 ret = -EFAULT;
1467 goto free_newinfo;
1468 }
1469
1470 ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1471 &newinfo, &loc_cpu_entry, tmp.size,
1472 tmp.num_entries, tmp.hook_entry,
1473 tmp.underflow);
1474 if (ret != 0)
1475 goto free_newinfo;
1476
1477 duprintf("compat_do_replace: Translated table\n");
1478
1479 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1480 tmp.num_counters, compat_ptr(tmp.counters));
1481 if (ret)
1482 goto free_newinfo_untrans;
1483 return 0;
1484
1485 free_newinfo_untrans:
1486 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1487 free_newinfo:
1488 xt_free_table_info(newinfo);
1489 return ret;
1490}
1491
1492static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1493 unsigned int len)
971{ 1494{
972 int ret; 1495 int ret;
973 1496
@@ -976,11 +1499,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
976 1499
977 switch (cmd) { 1500 switch (cmd) {
978 case ARPT_SO_SET_REPLACE: 1501 case ARPT_SO_SET_REPLACE:
979 ret = do_replace(user, len); 1502 ret = compat_do_replace(sk->sk_net, user, len);
980 break; 1503 break;
981 1504
982 case ARPT_SO_SET_ADD_COUNTERS: 1505 case ARPT_SO_SET_ADD_COUNTERS:
983 ret = do_add_counters(user, len); 1506 ret = do_add_counters(sk->sk_net, user, len, 1);
984 break; 1507 break;
985 1508
986 default: 1509 default:
@@ -991,74 +1514,191 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
991 return ret; 1514 return ret;
992} 1515}
993 1516
994static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 1517static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1518 compat_uint_t *size,
1519 struct xt_counters *counters,
1520 unsigned int *i)
995{ 1521{
1522 struct arpt_entry_target *t;
1523 struct compat_arpt_entry __user *ce;
1524 u_int16_t target_offset, next_offset;
1525 compat_uint_t origsize;
996 int ret; 1526 int ret;
997 1527
998 if (!capable(CAP_NET_ADMIN)) 1528 ret = -EFAULT;
999 return -EPERM; 1529 origsize = *size;
1530 ce = (struct compat_arpt_entry __user *)*dstptr;
1531 if (copy_to_user(ce, e, sizeof(struct arpt_entry)))
1532 goto out;
1000 1533
1001 switch (cmd) { 1534 if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1002 case ARPT_SO_GET_INFO: { 1535 goto out;
1003 char name[ARPT_TABLE_MAXNAMELEN]; 1536
1004 struct arpt_table *t; 1537 *dstptr += sizeof(struct compat_arpt_entry);
1538 *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
1539
1540 target_offset = e->target_offset - (origsize - *size);
1541
1542 t = arpt_get_target(e);
1543 ret = xt_compat_target_to_user(t, dstptr, size);
1544 if (ret)
1545 goto out;
1546 ret = -EFAULT;
1547 next_offset = e->next_offset - (origsize - *size);
1548 if (put_user(target_offset, &ce->target_offset))
1549 goto out;
1550 if (put_user(next_offset, &ce->next_offset))
1551 goto out;
1552
1553 (*i)++;
1554 return 0;
1555out:
1556 return ret;
1557}
1558
1559static int compat_copy_entries_to_user(unsigned int total_size,
1560 struct arpt_table *table,
1561 void __user *userptr)
1562{
1563 struct xt_counters *counters;
1564 struct xt_table_info *private = table->private;
1565 void __user *pos;
1566 unsigned int size;
1567 int ret = 0;
1568 void *loc_cpu_entry;
1569 unsigned int i = 0;
1570
1571 counters = alloc_counters(table);
1572 if (IS_ERR(counters))
1573 return PTR_ERR(counters);
1574
1575 /* choose the copy on our node/cpu */
1576 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1577 pos = userptr;
1578 size = total_size;
1579 ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1580 compat_copy_entry_to_user,
1581 &pos, &size, counters, &i);
1582 vfree(counters);
1583 return ret;
1584}
1585
1586struct compat_arpt_get_entries {
1587 char name[ARPT_TABLE_MAXNAMELEN];
1588 compat_uint_t size;
1589 struct compat_arpt_entry entrytable[0];
1590};
1005 1591
1006 if (*len != sizeof(struct arpt_getinfo)) { 1592static int compat_get_entries(struct net *net,
1007 duprintf("length %u != %Zu\n", *len, 1593 struct compat_arpt_get_entries __user *uptr,
1008 sizeof(struct arpt_getinfo)); 1594 int *len)
1595{
1596 int ret;
1597 struct compat_arpt_get_entries get;
1598 struct arpt_table *t;
1599
1600 if (*len < sizeof(get)) {
1601 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1602 return -EINVAL;
1603 }
1604 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1605 return -EFAULT;
1606 if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
1607 duprintf("compat_get_entries: %u != %zu\n",
1608 *len, sizeof(get) + get.size);
1609 return -EINVAL;
1610 }
1611
1612 xt_compat_lock(NF_ARP);
1613 t = xt_find_table_lock(net, NF_ARP, get.name);
1614 if (t && !IS_ERR(t)) {
1615 struct xt_table_info *private = t->private;
1616 struct xt_table_info info;
1617
1618 duprintf("t->private->number = %u\n", private->number);
1619 ret = compat_table_info(private, &info);
1620 if (!ret && get.size == info.size) {
1621 ret = compat_copy_entries_to_user(private->size,
1622 t, uptr->entrytable);
1623 } else if (!ret) {
1624 duprintf("compat_get_entries: I've got %u not %u!\n",
1625 private->size, get.size);
1009 ret = -EINVAL; 1626 ret = -EINVAL;
1010 break;
1011 } 1627 }
1628 xt_compat_flush_offsets(NF_ARP);
1629 module_put(t->me);
1630 xt_table_unlock(t);
1631 } else
1632 ret = t ? PTR_ERR(t) : -ENOENT;
1012 1633
1013 if (copy_from_user(name, user, sizeof(name)) != 0) { 1634 xt_compat_unlock(NF_ARP);
1014 ret = -EFAULT; 1635 return ret;
1015 break; 1636}
1016 } 1637
1017 name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; 1638static int do_arpt_get_ctl(struct sock *, int, void __user *, int *);
1018 1639
1019 t = try_then_request_module(xt_find_table_lock(NF_ARP, name), 1640static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
1020 "arptable_%s", name); 1641 int *len)
1021 if (t && !IS_ERR(t)) { 1642{
1022 struct arpt_getinfo info; 1643 int ret;
1023 struct xt_table_info *private = t->private; 1644
1024 1645 if (!capable(CAP_NET_ADMIN))
1025 info.valid_hooks = t->valid_hooks; 1646 return -EPERM;
1026 memcpy(info.hook_entry, private->hook_entry, 1647
1027 sizeof(info.hook_entry)); 1648 switch (cmd) {
1028 memcpy(info.underflow, private->underflow, 1649 case ARPT_SO_GET_INFO:
1029 sizeof(info.underflow)); 1650 ret = get_info(sk->sk_net, user, len, 1);
1030 info.num_entries = private->number; 1651 break;
1031 info.size = private->size; 1652 case ARPT_SO_GET_ENTRIES:
1032 strcpy(info.name, name); 1653 ret = compat_get_entries(sk->sk_net, user, len);
1033 1654 break;
1034 if (copy_to_user(user, &info, *len) != 0) 1655 default:
1035 ret = -EFAULT; 1656 ret = do_arpt_get_ctl(sk, cmd, user, len);
1036 else
1037 ret = 0;
1038 xt_table_unlock(t);
1039 module_put(t->me);
1040 } else
1041 ret = t ? PTR_ERR(t) : -ENOENT;
1042 } 1657 }
1043 break; 1658 return ret;
1659}
1660#endif
1661
1662static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1663{
1664 int ret;
1044 1665
1045 case ARPT_SO_GET_ENTRIES: { 1666 if (!capable(CAP_NET_ADMIN))
1046 struct arpt_get_entries get; 1667 return -EPERM;
1047 1668
1048 if (*len < sizeof(get)) { 1669 switch (cmd) {
1049 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); 1670 case ARPT_SO_SET_REPLACE:
1050 ret = -EINVAL; 1671 ret = do_replace(sk->sk_net, user, len);
1051 } else if (copy_from_user(&get, user, sizeof(get)) != 0) { 1672 break;
1052 ret = -EFAULT; 1673
1053 } else if (*len != sizeof(struct arpt_get_entries) + get.size) { 1674 case ARPT_SO_SET_ADD_COUNTERS:
1054 duprintf("get_entries: %u != %Zu\n", *len, 1675 ret = do_add_counters(sk->sk_net, user, len, 0);
1055 sizeof(struct arpt_get_entries) + get.size);
1056 ret = -EINVAL;
1057 } else
1058 ret = get_entries(&get, user);
1059 break; 1676 break;
1677
1678 default:
1679 duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
1680 ret = -EINVAL;
1060 } 1681 }
1061 1682
1683 return ret;
1684}
1685
1686static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1687{
1688 int ret;
1689
1690 if (!capable(CAP_NET_ADMIN))
1691 return -EPERM;
1692
1693 switch (cmd) {
1694 case ARPT_SO_GET_INFO:
1695 ret = get_info(sk->sk_net, user, len, 0);
1696 break;
1697
1698 case ARPT_SO_GET_ENTRIES:
1699 ret = get_entries(sk->sk_net, user, len);
1700 break;
1701
1062 case ARPT_SO_GET_REVISION_TARGET: { 1702 case ARPT_SO_GET_REVISION_TARGET: {
1063 struct xt_get_revision rev; 1703 struct xt_get_revision rev;
1064 1704
@@ -1085,19 +1725,21 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1085 return ret; 1725 return ret;
1086} 1726}
1087 1727
1088int arpt_register_table(struct arpt_table *table, 1728struct arpt_table *arpt_register_table(struct net *net,
1089 const struct arpt_replace *repl) 1729 struct arpt_table *table,
1730 const struct arpt_replace *repl)
1090{ 1731{
1091 int ret; 1732 int ret;
1092 struct xt_table_info *newinfo; 1733 struct xt_table_info *newinfo;
1093 static struct xt_table_info bootstrap 1734 struct xt_table_info bootstrap
1094 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1735 = { 0, 0, 0, { 0 }, { 0 }, { } };
1095 void *loc_cpu_entry; 1736 void *loc_cpu_entry;
1737 struct xt_table *new_table;
1096 1738
1097 newinfo = xt_alloc_table_info(repl->size); 1739 newinfo = xt_alloc_table_info(repl->size);
1098 if (!newinfo) { 1740 if (!newinfo) {
1099 ret = -ENOMEM; 1741 ret = -ENOMEM;
1100 return ret; 1742 goto out;
1101 } 1743 }
1102 1744
1103 /* choose the copy on our node/cpu */ 1745 /* choose the copy on our node/cpu */
@@ -1111,24 +1753,27 @@ int arpt_register_table(struct arpt_table *table,
1111 repl->underflow); 1753 repl->underflow);
1112 1754
1113 duprintf("arpt_register_table: translate table gives %d\n", ret); 1755 duprintf("arpt_register_table: translate table gives %d\n", ret);
1114 if (ret != 0) { 1756 if (ret != 0)
1115 xt_free_table_info(newinfo); 1757 goto out_free;
1116 return ret;
1117 }
1118 1758
1119 ret = xt_register_table(table, &bootstrap, newinfo); 1759 new_table = xt_register_table(net, table, &bootstrap, newinfo);
1120 if (ret != 0) { 1760 if (IS_ERR(new_table)) {
1121 xt_free_table_info(newinfo); 1761 ret = PTR_ERR(new_table);
1122 return ret; 1762 goto out_free;
1123 } 1763 }
1764 return new_table;
1124 1765
1125 return 0; 1766out_free:
1767 xt_free_table_info(newinfo);
1768out:
1769 return ERR_PTR(ret);
1126} 1770}
1127 1771
1128void arpt_unregister_table(struct arpt_table *table) 1772void arpt_unregister_table(struct arpt_table *table)
1129{ 1773{
1130 struct xt_table_info *private; 1774 struct xt_table_info *private;
1131 void *loc_cpu_entry; 1775 void *loc_cpu_entry;
1776 struct module *table_owner = table->me;
1132 1777
1133 private = xt_unregister_table(table); 1778 private = xt_unregister_table(table);
1134 1779
@@ -1136,6 +1781,8 @@ void arpt_unregister_table(struct arpt_table *table)
1136 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1781 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1137 ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, 1782 ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
1138 cleanup_entry, NULL); 1783 cleanup_entry, NULL);
1784 if (private->number > private->initial_entries)
1785 module_put(table_owner);
1139 xt_free_table_info(private); 1786 xt_free_table_info(private);
1140} 1787}
1141 1788
@@ -1144,6 +1791,11 @@ static struct arpt_target arpt_standard_target __read_mostly = {
1144 .name = ARPT_STANDARD_TARGET, 1791 .name = ARPT_STANDARD_TARGET,
1145 .targetsize = sizeof(int), 1792 .targetsize = sizeof(int),
1146 .family = NF_ARP, 1793 .family = NF_ARP,
1794#ifdef CONFIG_COMPAT
1795 .compatsize = sizeof(compat_int_t),
1796 .compat_from_user = compat_standard_from_user,
1797 .compat_to_user = compat_standard_to_user,
1798#endif
1147}; 1799};
1148 1800
1149static struct arpt_target arpt_error_target __read_mostly = { 1801static struct arpt_target arpt_error_target __read_mostly = {
@@ -1158,17 +1810,38 @@ static struct nf_sockopt_ops arpt_sockopts = {
1158 .set_optmin = ARPT_BASE_CTL, 1810 .set_optmin = ARPT_BASE_CTL,
1159 .set_optmax = ARPT_SO_SET_MAX+1, 1811 .set_optmax = ARPT_SO_SET_MAX+1,
1160 .set = do_arpt_set_ctl, 1812 .set = do_arpt_set_ctl,
1813#ifdef CONFIG_COMPAT
1814 .compat_set = compat_do_arpt_set_ctl,
1815#endif
1161 .get_optmin = ARPT_BASE_CTL, 1816 .get_optmin = ARPT_BASE_CTL,
1162 .get_optmax = ARPT_SO_GET_MAX+1, 1817 .get_optmax = ARPT_SO_GET_MAX+1,
1163 .get = do_arpt_get_ctl, 1818 .get = do_arpt_get_ctl,
1819#ifdef CONFIG_COMPAT
1820 .compat_get = compat_do_arpt_get_ctl,
1821#endif
1164 .owner = THIS_MODULE, 1822 .owner = THIS_MODULE,
1165}; 1823};
1166 1824
1825static int __net_init arp_tables_net_init(struct net *net)
1826{
1827 return xt_proto_init(net, NF_ARP);
1828}
1829
1830static void __net_exit arp_tables_net_exit(struct net *net)
1831{
1832 xt_proto_fini(net, NF_ARP);
1833}
1834
1835static struct pernet_operations arp_tables_net_ops = {
1836 .init = arp_tables_net_init,
1837 .exit = arp_tables_net_exit,
1838};
1839
1167static int __init arp_tables_init(void) 1840static int __init arp_tables_init(void)
1168{ 1841{
1169 int ret; 1842 int ret;
1170 1843
1171 ret = xt_proto_init(NF_ARP); 1844 ret = register_pernet_subsys(&arp_tables_net_ops);
1172 if (ret < 0) 1845 if (ret < 0)
1173 goto err1; 1846 goto err1;
1174 1847
@@ -1193,7 +1866,7 @@ err4:
1193err3: 1866err3:
1194 xt_unregister_target(&arpt_standard_target); 1867 xt_unregister_target(&arpt_standard_target);
1195err2: 1868err2:
1196 xt_proto_fini(NF_ARP); 1869 unregister_pernet_subsys(&arp_tables_net_ops);
1197err1: 1870err1:
1198 return ret; 1871 return ret;
1199} 1872}
@@ -1203,7 +1876,7 @@ static void __exit arp_tables_fini(void)
1203 nf_unregister_sockopt(&arpt_sockopts); 1876 nf_unregister_sockopt(&arpt_sockopts);
1204 xt_unregister_target(&arpt_error_target); 1877 xt_unregister_target(&arpt_error_target);
1205 xt_unregister_target(&arpt_standard_target); 1878 xt_unregister_target(&arpt_standard_target);
1206 xt_proto_fini(NF_ARP); 1879 unregister_pernet_subsys(&arp_tables_net_ops);
1207} 1880}
1208 1881
1209EXPORT_SYMBOL(arpt_register_table); 1882EXPORT_SYMBOL(arpt_register_table);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 302d3da5f696..4e9c496a30c2 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -20,7 +20,7 @@ static struct
20 struct arpt_replace repl; 20 struct arpt_replace repl;
21 struct arpt_standard entries[3]; 21 struct arpt_standard entries[3];
22 struct arpt_error term; 22 struct arpt_error term;
23} initial_table __initdata = { 23} initial_table __net_initdata = {
24 .repl = { 24 .repl = {
25 .name = "filter", 25 .name = "filter",
26 .valid_hooks = FILTER_VALID_HOOKS, 26 .valid_hooks = FILTER_VALID_HOOKS,
@@ -61,10 +61,10 @@ static unsigned int arpt_hook(unsigned int hook,
61 const struct net_device *out, 61 const struct net_device *out,
62 int (*okfn)(struct sk_buff *)) 62 int (*okfn)(struct sk_buff *))
63{ 63{
64 return arpt_do_table(skb, hook, in, out, &packet_filter); 64 return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter);
65} 65}
66 66
67static struct nf_hook_ops arpt_ops[] = { 67static struct nf_hook_ops arpt_ops[] __read_mostly = {
68 { 68 {
69 .hook = arpt_hook, 69 .hook = arpt_hook,
70 .owner = THIS_MODULE, 70 .owner = THIS_MODULE,
@@ -85,12 +85,31 @@ static struct nf_hook_ops arpt_ops[] = {
85 }, 85 },
86}; 86};
87 87
88static int __net_init arptable_filter_net_init(struct net *net)
89{
90 /* Register table */
91 net->ipv4.arptable_filter =
92 arpt_register_table(net, &packet_filter, &initial_table.repl);
93 if (IS_ERR(net->ipv4.arptable_filter))
94 return PTR_ERR(net->ipv4.arptable_filter);
95 return 0;
96}
97
98static void __net_exit arptable_filter_net_exit(struct net *net)
99{
100 arpt_unregister_table(net->ipv4.arptable_filter);
101}
102
103static struct pernet_operations arptable_filter_net_ops = {
104 .init = arptable_filter_net_init,
105 .exit = arptable_filter_net_exit,
106};
107
88static int __init arptable_filter_init(void) 108static int __init arptable_filter_init(void)
89{ 109{
90 int ret; 110 int ret;
91 111
92 /* Register table */ 112 ret = register_pernet_subsys(&arptable_filter_net_ops);
93 ret = arpt_register_table(&packet_filter, &initial_table.repl);
94 if (ret < 0) 113 if (ret < 0)
95 return ret; 114 return ret;
96 115
@@ -100,14 +119,14 @@ static int __init arptable_filter_init(void)
100 return ret; 119 return ret;
101 120
102cleanup_table: 121cleanup_table:
103 arpt_unregister_table(&packet_filter); 122 unregister_pernet_subsys(&arptable_filter_net_ops);
104 return ret; 123 return ret;
105} 124}
106 125
107static void __exit arptable_filter_fini(void) 126static void __exit arptable_filter_fini(void)
108{ 127{
109 nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); 128 nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
110 arpt_unregister_table(&packet_filter); 129 unregister_pernet_subsys(&arptable_filter_net_ops);
111} 130}
112 131
113module_init(arptable_filter_init); 132module_init(arptable_filter_init);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 10a2ce09fd8e..6bda1102851b 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -22,24 +22,21 @@
22#include <linux/spinlock.h> 22#include <linux/spinlock.h>
23#include <linux/sysctl.h> 23#include <linux/sysctl.h>
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/seq_file.h>
25#include <linux/security.h> 26#include <linux/security.h>
26#include <linux/mutex.h> 27#include <linux/mutex.h>
27#include <net/net_namespace.h> 28#include <net/net_namespace.h>
28#include <net/sock.h> 29#include <net/sock.h>
29#include <net/route.h> 30#include <net/route.h>
31#include <net/netfilter/nf_queue.h>
32#include <net/ip.h>
30 33
31#define IPQ_QMAX_DEFAULT 1024 34#define IPQ_QMAX_DEFAULT 1024
32#define IPQ_PROC_FS_NAME "ip_queue" 35#define IPQ_PROC_FS_NAME "ip_queue"
33#define NET_IPQ_QMAX 2088 36#define NET_IPQ_QMAX 2088
34#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" 37#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
35 38
36struct ipq_queue_entry { 39typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
37 struct list_head list;
38 struct nf_info *info;
39 struct sk_buff *skb;
40};
41
42typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
43 40
44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 41static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 42static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
@@ -53,76 +50,13 @@ static struct sock *ipqnl __read_mostly;
53static LIST_HEAD(queue_list); 50static LIST_HEAD(queue_list);
54static DEFINE_MUTEX(ipqnl_mutex); 51static DEFINE_MUTEX(ipqnl_mutex);
55 52
56static void
57ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
58{
59 /* TCP input path (and probably other bits) assume to be called
60 * from softirq context, not from syscall, like ipq_issue_verdict is
61 * called. TCP input path deadlocks with locks taken from timer
62 * softirq, e.g. We therefore emulate this by local_bh_disable() */
63
64 local_bh_disable();
65 nf_reinject(entry->skb, entry->info, verdict);
66 local_bh_enable();
67
68 kfree(entry);
69}
70
71static inline void 53static inline void
72__ipq_enqueue_entry(struct ipq_queue_entry *entry) 54__ipq_enqueue_entry(struct nf_queue_entry *entry)
73{ 55{
74 list_add(&entry->list, &queue_list); 56 list_add_tail(&entry->list, &queue_list);
75 queue_total++; 57 queue_total++;
76} 58}
77 59
78/*
79 * Find and return a queued entry matched by cmpfn, or return the last
80 * entry if cmpfn is NULL.
81 */
82static inline struct ipq_queue_entry *
83__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
84{
85 struct list_head *p;
86
87 list_for_each_prev(p, &queue_list) {
88 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
89
90 if (!cmpfn || cmpfn(entry, data))
91 return entry;
92 }
93 return NULL;
94}
95
96static inline void
97__ipq_dequeue_entry(struct ipq_queue_entry *entry)
98{
99 list_del(&entry->list);
100 queue_total--;
101}
102
103static inline struct ipq_queue_entry *
104__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
105{
106 struct ipq_queue_entry *entry;
107
108 entry = __ipq_find_entry(cmpfn, data);
109 if (entry == NULL)
110 return NULL;
111
112 __ipq_dequeue_entry(entry);
113 return entry;
114}
115
116
117static inline void
118__ipq_flush(int verdict)
119{
120 struct ipq_queue_entry *entry;
121
122 while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
123 ipq_issue_verdict(entry, verdict);
124}
125
126static inline int 60static inline int
127__ipq_set_mode(unsigned char mode, unsigned int range) 61__ipq_set_mode(unsigned char mode, unsigned int range)
128{ 62{
@@ -149,36 +83,64 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
149 return status; 83 return status;
150} 84}
151 85
86static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
87
152static inline void 88static inline void
153__ipq_reset(void) 89__ipq_reset(void)
154{ 90{
155 peer_pid = 0; 91 peer_pid = 0;
156 net_disable_timestamp(); 92 net_disable_timestamp();
157 __ipq_set_mode(IPQ_COPY_NONE, 0); 93 __ipq_set_mode(IPQ_COPY_NONE, 0);
158 __ipq_flush(NF_DROP); 94 __ipq_flush(NULL, 0);
159} 95}
160 96
161static struct ipq_queue_entry * 97static struct nf_queue_entry *
162ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 98ipq_find_dequeue_entry(unsigned long id)
163{ 99{
164 struct ipq_queue_entry *entry; 100 struct nf_queue_entry *entry = NULL, *i;
165 101
166 write_lock_bh(&queue_lock); 102 write_lock_bh(&queue_lock);
167 entry = __ipq_find_dequeue_entry(cmpfn, data); 103
104 list_for_each_entry(i, &queue_list, list) {
105 if ((unsigned long)i == id) {
106 entry = i;
107 break;
108 }
109 }
110
111 if (entry) {
112 list_del(&entry->list);
113 queue_total--;
114 }
115
168 write_unlock_bh(&queue_lock); 116 write_unlock_bh(&queue_lock);
169 return entry; 117 return entry;
170} 118}
171 119
172static void 120static void
173ipq_flush(int verdict) 121__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
122{
123 struct nf_queue_entry *entry, *next;
124
125 list_for_each_entry_safe(entry, next, &queue_list, list) {
126 if (!cmpfn || cmpfn(entry, data)) {
127 list_del(&entry->list);
128 queue_total--;
129 nf_reinject(entry, NF_DROP);
130 }
131 }
132}
133
134static void
135ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
174{ 136{
175 write_lock_bh(&queue_lock); 137 write_lock_bh(&queue_lock);
176 __ipq_flush(verdict); 138 __ipq_flush(cmpfn, data);
177 write_unlock_bh(&queue_lock); 139 write_unlock_bh(&queue_lock);
178} 140}
179 141
180static struct sk_buff * 142static struct sk_buff *
181ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 143ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
182{ 144{
183 sk_buff_data_t old_tail; 145 sk_buff_data_t old_tail;
184 size_t size = 0; 146 size_t size = 0;
@@ -235,20 +197,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
235 pmsg->timestamp_sec = tv.tv_sec; 197 pmsg->timestamp_sec = tv.tv_sec;
236 pmsg->timestamp_usec = tv.tv_usec; 198 pmsg->timestamp_usec = tv.tv_usec;
237 pmsg->mark = entry->skb->mark; 199 pmsg->mark = entry->skb->mark;
238 pmsg->hook = entry->info->hook; 200 pmsg->hook = entry->hook;
239 pmsg->hw_protocol = entry->skb->protocol; 201 pmsg->hw_protocol = entry->skb->protocol;
240 202
241 if (entry->info->indev) 203 if (entry->indev)
242 strcpy(pmsg->indev_name, entry->info->indev->name); 204 strcpy(pmsg->indev_name, entry->indev->name);
243 else 205 else
244 pmsg->indev_name[0] = '\0'; 206 pmsg->indev_name[0] = '\0';
245 207
246 if (entry->info->outdev) 208 if (entry->outdev)
247 strcpy(pmsg->outdev_name, entry->info->outdev->name); 209 strcpy(pmsg->outdev_name, entry->outdev->name);
248 else 210 else
249 pmsg->outdev_name[0] = '\0'; 211 pmsg->outdev_name[0] = '\0';
250 212
251 if (entry->info->indev && entry->skb->dev) { 213 if (entry->indev && entry->skb->dev) {
252 pmsg->hw_type = entry->skb->dev->type; 214 pmsg->hw_type = entry->skb->dev->type;
253 pmsg->hw_addrlen = dev_parse_header(entry->skb, 215 pmsg->hw_addrlen = dev_parse_header(entry->skb,
254 pmsg->hw_addr); 216 pmsg->hw_addr);
@@ -270,28 +232,17 @@ nlmsg_failure:
270} 232}
271 233
272static int 234static int
273ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 235ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
274 unsigned int queuenum, void *data)
275{ 236{
276 int status = -EINVAL; 237 int status = -EINVAL;
277 struct sk_buff *nskb; 238 struct sk_buff *nskb;
278 struct ipq_queue_entry *entry;
279 239
280 if (copy_mode == IPQ_COPY_NONE) 240 if (copy_mode == IPQ_COPY_NONE)
281 return -EAGAIN; 241 return -EAGAIN;
282 242
283 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
284 if (entry == NULL) {
285 printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n");
286 return -ENOMEM;
287 }
288
289 entry->info = info;
290 entry->skb = skb;
291
292 nskb = ipq_build_packet_message(entry, &status); 243 nskb = ipq_build_packet_message(entry, &status);
293 if (nskb == NULL) 244 if (nskb == NULL)
294 goto err_out_free; 245 return status;
295 246
296 write_lock_bh(&queue_lock); 247 write_lock_bh(&queue_lock);
297 248
@@ -325,14 +276,11 @@ err_out_free_nskb:
325 276
326err_out_unlock: 277err_out_unlock:
327 write_unlock_bh(&queue_lock); 278 write_unlock_bh(&queue_lock);
328
329err_out_free:
330 kfree(entry);
331 return status; 279 return status;
332} 280}
333 281
334static int 282static int
335ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) 283ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
336{ 284{
337 int diff; 285 int diff;
338 int err; 286 int err;
@@ -367,21 +315,15 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
367 return 0; 315 return 0;
368} 316}
369 317
370static inline int
371id_cmp(struct ipq_queue_entry *e, unsigned long id)
372{
373 return (id == (unsigned long )e);
374}
375
376static int 318static int
377ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 319ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
378{ 320{
379 struct ipq_queue_entry *entry; 321 struct nf_queue_entry *entry;
380 322
381 if (vmsg->value > NF_MAX_VERDICT) 323 if (vmsg->value > NF_MAX_VERDICT)
382 return -EINVAL; 324 return -EINVAL;
383 325
384 entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); 326 entry = ipq_find_dequeue_entry(vmsg->id);
385 if (entry == NULL) 327 if (entry == NULL)
386 return -ENOENT; 328 return -ENOENT;
387 else { 329 else {
@@ -391,7 +333,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
391 if (ipq_mangle_ipv4(vmsg, entry) < 0) 333 if (ipq_mangle_ipv4(vmsg, entry) < 0)
392 verdict = NF_DROP; 334 verdict = NF_DROP;
393 335
394 ipq_issue_verdict(entry, verdict); 336 nf_reinject(entry, verdict);
395 return 0; 337 return 0;
396 } 338 }
397} 339}
@@ -436,13 +378,13 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
436} 378}
437 379
438static int 380static int
439dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) 381dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
440{ 382{
441 if (entry->info->indev) 383 if (entry->indev)
442 if (entry->info->indev->ifindex == ifindex) 384 if (entry->indev->ifindex == ifindex)
443 return 1; 385 return 1;
444 if (entry->info->outdev) 386 if (entry->outdev)
445 if (entry->info->outdev->ifindex == ifindex) 387 if (entry->outdev->ifindex == ifindex)
446 return 1; 388 return 1;
447#ifdef CONFIG_BRIDGE_NETFILTER 389#ifdef CONFIG_BRIDGE_NETFILTER
448 if (entry->skb->nf_bridge) { 390 if (entry->skb->nf_bridge) {
@@ -460,10 +402,7 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
460static void 402static void
461ipq_dev_drop(int ifindex) 403ipq_dev_drop(int ifindex)
462{ 404{
463 struct ipq_queue_entry *entry; 405 ipq_flush(dev_cmp, ifindex);
464
465 while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
466 ipq_issue_verdict(entry, NF_DROP);
467} 406}
468 407
469#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 408#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -573,6 +512,7 @@ static struct notifier_block ipq_nl_notifier = {
573 .notifier_call = ipq_rcv_nl_event, 512 .notifier_call = ipq_rcv_nl_event,
574}; 513};
575 514
515#ifdef CONFIG_SYSCTL
576static struct ctl_table_header *ipq_sysctl_header; 516static struct ctl_table_header *ipq_sysctl_header;
577 517
578static ctl_table ipq_table[] = { 518static ctl_table ipq_table[] = {
@@ -586,36 +526,14 @@ static ctl_table ipq_table[] = {
586 }, 526 },
587 { .ctl_name = 0 } 527 { .ctl_name = 0 }
588}; 528};
589 529#endif
590static ctl_table ipq_dir_table[] = {
591 {
592 .ctl_name = NET_IPV4,
593 .procname = "ipv4",
594 .mode = 0555,
595 .child = ipq_table
596 },
597 { .ctl_name = 0 }
598};
599
600static ctl_table ipq_root_table[] = {
601 {
602 .ctl_name = CTL_NET,
603 .procname = "net",
604 .mode = 0555,
605 .child = ipq_dir_table
606 },
607 { .ctl_name = 0 }
608};
609 530
610#ifdef CONFIG_PROC_FS 531#ifdef CONFIG_PROC_FS
611static int 532static int ip_queue_show(struct seq_file *m, void *v)
612ipq_get_info(char *buffer, char **start, off_t offset, int length)
613{ 533{
614 int len;
615
616 read_lock_bh(&queue_lock); 534 read_lock_bh(&queue_lock);
617 535
618 len = sprintf(buffer, 536 seq_printf(m,
619 "Peer PID : %d\n" 537 "Peer PID : %d\n"
620 "Copy mode : %hu\n" 538 "Copy mode : %hu\n"
621 "Copy range : %u\n" 539 "Copy range : %u\n"
@@ -632,18 +550,24 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
632 queue_user_dropped); 550 queue_user_dropped);
633 551
634 read_unlock_bh(&queue_lock); 552 read_unlock_bh(&queue_lock);
553 return 0;
554}
635 555
636 *start = buffer + offset; 556static int ip_queue_open(struct inode *inode, struct file *file)
637 len -= offset; 557{
638 if (len > length) 558 return single_open(file, ip_queue_show, NULL);
639 len = length;
640 else if (len < 0)
641 len = 0;
642 return len;
643} 559}
644#endif /* CONFIG_PROC_FS */
645 560
646static struct nf_queue_handler nfqh = { 561static const struct file_operations ip_queue_proc_fops = {
562 .open = ip_queue_open,
563 .read = seq_read,
564 .llseek = seq_lseek,
565 .release = single_release,
566 .owner = THIS_MODULE,
567};
568#endif
569
570static const struct nf_queue_handler nfqh = {
647 .name = "ip_queue", 571 .name = "ip_queue",
648 .outfn = &ipq_enqueue_packet, 572 .outfn = &ipq_enqueue_packet,
649}; 573};
@@ -651,7 +575,7 @@ static struct nf_queue_handler nfqh = {
651static int __init ip_queue_init(void) 575static int __init ip_queue_init(void)
652{ 576{
653 int status = -ENOMEM; 577 int status = -ENOMEM;
654 struct proc_dir_entry *proc; 578 struct proc_dir_entry *proc __maybe_unused;
655 579
656 netlink_register_notifier(&ipq_nl_notifier); 580 netlink_register_notifier(&ipq_nl_notifier);
657 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, 581 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
@@ -661,17 +585,20 @@ static int __init ip_queue_init(void)
661 goto cleanup_netlink_notifier; 585 goto cleanup_netlink_notifier;
662 } 586 }
663 587
664 proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); 588#ifdef CONFIG_PROC_FS
665 if (proc) 589 proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
590 if (proc) {
666 proc->owner = THIS_MODULE; 591 proc->owner = THIS_MODULE;
667 else { 592 proc->proc_fops = &ip_queue_proc_fops;
593 } else {
668 printk(KERN_ERR "ip_queue: failed to create proc entry\n"); 594 printk(KERN_ERR "ip_queue: failed to create proc entry\n");
669 goto cleanup_ipqnl; 595 goto cleanup_ipqnl;
670 } 596 }
671 597#endif
672 register_netdevice_notifier(&ipq_dev_notifier); 598 register_netdevice_notifier(&ipq_dev_notifier);
673 ipq_sysctl_header = register_sysctl_table(ipq_root_table); 599#ifdef CONFIG_SYSCTL
674 600 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
601#endif
675 status = nf_register_queue_handler(PF_INET, &nfqh); 602 status = nf_register_queue_handler(PF_INET, &nfqh);
676 if (status < 0) { 603 if (status < 0) {
677 printk(KERN_ERR "ip_queue: failed to register queue handler\n"); 604 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
@@ -680,11 +607,13 @@ static int __init ip_queue_init(void)
680 return status; 607 return status;
681 608
682cleanup_sysctl: 609cleanup_sysctl:
610#ifdef CONFIG_SYSCTL
683 unregister_sysctl_table(ipq_sysctl_header); 611 unregister_sysctl_table(ipq_sysctl_header);
612#endif
684 unregister_netdevice_notifier(&ipq_dev_notifier); 613 unregister_netdevice_notifier(&ipq_dev_notifier);
685 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 614 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
686cleanup_ipqnl: 615cleanup_ipqnl: __maybe_unused
687 sock_release(ipqnl->sk_socket); 616 netlink_kernel_release(ipqnl);
688 mutex_lock(&ipqnl_mutex); 617 mutex_lock(&ipqnl_mutex);
689 mutex_unlock(&ipqnl_mutex); 618 mutex_unlock(&ipqnl_mutex);
690 619
@@ -697,13 +626,15 @@ static void __exit ip_queue_fini(void)
697{ 626{
698 nf_unregister_queue_handlers(&nfqh); 627 nf_unregister_queue_handlers(&nfqh);
699 synchronize_net(); 628 synchronize_net();
700 ipq_flush(NF_DROP); 629 ipq_flush(NULL, 0);
701 630
631#ifdef CONFIG_SYSCTL
702 unregister_sysctl_table(ipq_sysctl_header); 632 unregister_sysctl_table(ipq_sysctl_header);
633#endif
703 unregister_netdevice_notifier(&ipq_dev_notifier); 634 unregister_netdevice_notifier(&ipq_dev_notifier);
704 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 635 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
705 636
706 sock_release(ipqnl->sk_socket); 637 netlink_kernel_release(ipqnl);
707 mutex_lock(&ipqnl_mutex); 638 mutex_lock(&ipqnl_mutex);
708 mutex_unlock(&ipqnl_mutex); 639 mutex_unlock(&ipqnl_mutex);
709 640
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4b10b98640ac..600737f122d2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -26,6 +26,7 @@
26 26
27#include <linux/netfilter/x_tables.h> 27#include <linux/netfilter/x_tables.h>
28#include <linux/netfilter_ipv4/ip_tables.h> 28#include <linux/netfilter_ipv4/ip_tables.h>
29#include <net/netfilter/nf_log.h>
29 30
30MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 32MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -74,7 +75,8 @@ do { \
74 Hence the start of any table is given by get_table() below. */ 75 Hence the start of any table is given by get_table() below. */
75 76
76/* Returns whether matches rule or not. */ 77/* Returns whether matches rule or not. */
77static inline int 78/* Performance critical - called for every packet */
79static inline bool
78ip_packet_match(const struct iphdr *ip, 80ip_packet_match(const struct iphdr *ip,
79 const char *indev, 81 const char *indev,
80 const char *outdev, 82 const char *outdev,
@@ -84,7 +86,7 @@ ip_packet_match(const struct iphdr *ip,
84 size_t i; 86 size_t i;
85 unsigned long ret; 87 unsigned long ret;
86 88
87#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg)) 89#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
88 90
89 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, 91 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
90 IPT_INV_SRCIP) 92 IPT_INV_SRCIP)
@@ -102,7 +104,7 @@ ip_packet_match(const struct iphdr *ip,
102 NIPQUAD(ipinfo->dmsk.s_addr), 104 NIPQUAD(ipinfo->dmsk.s_addr),
103 NIPQUAD(ipinfo->dst.s_addr), 105 NIPQUAD(ipinfo->dst.s_addr),
104 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); 106 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
105 return 0; 107 return false;
106 } 108 }
107 109
108 /* Look for ifname matches; this should unroll nicely. */ 110 /* Look for ifname matches; this should unroll nicely. */
@@ -116,7 +118,7 @@ ip_packet_match(const struct iphdr *ip,
116 dprintf("VIA in mismatch (%s vs %s).%s\n", 118 dprintf("VIA in mismatch (%s vs %s).%s\n",
117 indev, ipinfo->iniface, 119 indev, ipinfo->iniface,
118 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); 120 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
119 return 0; 121 return false;
120 } 122 }
121 123
122 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { 124 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
@@ -129,7 +131,7 @@ ip_packet_match(const struct iphdr *ip,
129 dprintf("VIA out mismatch (%s vs %s).%s\n", 131 dprintf("VIA out mismatch (%s vs %s).%s\n",
130 outdev, ipinfo->outiface, 132 outdev, ipinfo->outiface,
131 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); 133 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
132 return 0; 134 return false;
133 } 135 }
134 136
135 /* Check specific protocol */ 137 /* Check specific protocol */
@@ -138,7 +140,7 @@ ip_packet_match(const struct iphdr *ip,
138 dprintf("Packet protocol %hi does not match %hi.%s\n", 140 dprintf("Packet protocol %hi does not match %hi.%s\n",
139 ip->protocol, ipinfo->proto, 141 ip->protocol, ipinfo->proto,
140 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); 142 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
141 return 0; 143 return false;
142 } 144 }
143 145
144 /* If we have a fragment rule but the packet is not a fragment 146 /* If we have a fragment rule but the packet is not a fragment
@@ -146,13 +148,13 @@ ip_packet_match(const struct iphdr *ip,
146 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { 148 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
147 dprintf("Fragment rule but not fragment.%s\n", 149 dprintf("Fragment rule but not fragment.%s\n",
148 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); 150 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
149 return 0; 151 return false;
150 } 152 }
151 153
152 return 1; 154 return true;
153} 155}
154 156
155static inline bool 157static bool
156ip_checkentry(const struct ipt_ip *ip) 158ip_checkentry(const struct ipt_ip *ip)
157{ 159{
158 if (ip->flags & ~IPT_F_MASK) { 160 if (ip->flags & ~IPT_F_MASK) {
@@ -182,8 +184,9 @@ ipt_error(struct sk_buff *skb,
182 return NF_DROP; 184 return NF_DROP;
183} 185}
184 186
185static inline 187/* Performance critical - called for every packet */
186bool do_match(struct ipt_entry_match *m, 188static inline bool
189do_match(struct ipt_entry_match *m,
187 const struct sk_buff *skb, 190 const struct sk_buff *skb,
188 const struct net_device *in, 191 const struct net_device *in,
189 const struct net_device *out, 192 const struct net_device *out,
@@ -198,6 +201,7 @@ bool do_match(struct ipt_entry_match *m,
198 return false; 201 return false;
199} 202}
200 203
204/* Performance critical */
201static inline struct ipt_entry * 205static inline struct ipt_entry *
202get_entry(void *base, unsigned int offset) 206get_entry(void *base, unsigned int offset)
203{ 207{
@@ -205,6 +209,7 @@ get_entry(void *base, unsigned int offset)
205} 209}
206 210
207/* All zeroes == unconditional rule. */ 211/* All zeroes == unconditional rule. */
212/* Mildly perf critical (only if packet tracing is on) */
208static inline int 213static inline int
209unconditional(const struct ipt_ip *ip) 214unconditional(const struct ipt_ip *ip)
210{ 215{
@@ -215,16 +220,17 @@ unconditional(const struct ipt_ip *ip)
215 return 0; 220 return 0;
216 221
217 return 1; 222 return 1;
223#undef FWINV
218} 224}
219 225
220#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 226#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
221 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 227 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
222static const char *hooknames[] = { 228static const char *const hooknames[] = {
223 [NF_IP_PRE_ROUTING] = "PREROUTING", 229 [NF_INET_PRE_ROUTING] = "PREROUTING",
224 [NF_IP_LOCAL_IN] = "INPUT", 230 [NF_INET_LOCAL_IN] = "INPUT",
225 [NF_IP_FORWARD] = "FORWARD", 231 [NF_INET_FORWARD] = "FORWARD",
226 [NF_IP_LOCAL_OUT] = "OUTPUT", 232 [NF_INET_LOCAL_OUT] = "OUTPUT",
227 [NF_IP_POST_ROUTING] = "POSTROUTING", 233 [NF_INET_POST_ROUTING] = "POSTROUTING",
228}; 234};
229 235
230enum nf_ip_trace_comments { 236enum nf_ip_trace_comments {
@@ -233,7 +239,7 @@ enum nf_ip_trace_comments {
233 NF_IP_TRACE_COMMENT_POLICY, 239 NF_IP_TRACE_COMMENT_POLICY,
234}; 240};
235 241
236static const char *comments[] = { 242static const char *const comments[] = {
237 [NF_IP_TRACE_COMMENT_RULE] = "rule", 243 [NF_IP_TRACE_COMMENT_RULE] = "rule",
238 [NF_IP_TRACE_COMMENT_RETURN] = "return", 244 [NF_IP_TRACE_COMMENT_RETURN] = "return",
239 [NF_IP_TRACE_COMMENT_POLICY] = "policy", 245 [NF_IP_TRACE_COMMENT_POLICY] = "policy",
@@ -249,6 +255,7 @@ static struct nf_loginfo trace_loginfo = {
249 }, 255 },
250}; 256};
251 257
258/* Mildly perf critical (only if packet tracing is on) */
252static inline int 259static inline int
253get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, 260get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
254 char *hookname, char **chainname, 261 char *hookname, char **chainname,
@@ -284,7 +291,7 @@ static void trace_packet(struct sk_buff *skb,
284 unsigned int hook, 291 unsigned int hook,
285 const struct net_device *in, 292 const struct net_device *in,
286 const struct net_device *out, 293 const struct net_device *out,
287 char *tablename, 294 const char *tablename,
288 struct xt_table_info *private, 295 struct xt_table_info *private,
289 struct ipt_entry *e) 296 struct ipt_entry *e)
290{ 297{
@@ -465,10 +472,9 @@ mark_source_chains(struct xt_table_info *newinfo,
465 472
466 /* No recursion; use packet counter to save back ptrs (reset 473 /* No recursion; use packet counter to save back ptrs (reset
467 to 0 as we leave), and comefrom to save source hook bitmask */ 474 to 0 as we leave), and comefrom to save source hook bitmask */
468 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { 475 for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
469 unsigned int pos = newinfo->hook_entry[hook]; 476 unsigned int pos = newinfo->hook_entry[hook];
470 struct ipt_entry *e 477 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
471 = (struct ipt_entry *)(entry0 + pos);
472 478
473 if (!(valid_hooks & (1 << hook))) 479 if (!(valid_hooks & (1 << hook)))
474 continue; 480 continue;
@@ -481,13 +487,12 @@ mark_source_chains(struct xt_table_info *newinfo,
481 = (void *)ipt_get_target(e); 487 = (void *)ipt_get_target(e);
482 int visited = e->comefrom & (1 << hook); 488 int visited = e->comefrom & (1 << hook);
483 489
484 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { 490 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
485 printk("iptables: loop hook %u pos %u %08X.\n", 491 printk("iptables: loop hook %u pos %u %08X.\n",
486 hook, pos, e->comefrom); 492 hook, pos, e->comefrom);
487 return 0; 493 return 0;
488 } 494 }
489 e->comefrom 495 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
490 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
491 496
492 /* Unconditional return/END. */ 497 /* Unconditional return/END. */
493 if ((e->target_offset == sizeof(struct ipt_entry) 498 if ((e->target_offset == sizeof(struct ipt_entry)
@@ -507,10 +512,10 @@ mark_source_chains(struct xt_table_info *newinfo,
507 /* Return: backtrack through the last 512 /* Return: backtrack through the last
508 big jump. */ 513 big jump. */
509 do { 514 do {
510 e->comefrom ^= (1<<NF_IP_NUMHOOKS); 515 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
511#ifdef DEBUG_IP_FIREWALL_USER 516#ifdef DEBUG_IP_FIREWALL_USER
512 if (e->comefrom 517 if (e->comefrom
513 & (1 << NF_IP_NUMHOOKS)) { 518 & (1 << NF_INET_NUMHOOKS)) {
514 duprintf("Back unset " 519 duprintf("Back unset "
515 "on hook %u " 520 "on hook %u "
516 "rule %u\n", 521 "rule %u\n",
@@ -567,7 +572,7 @@ mark_source_chains(struct xt_table_info *newinfo,
567 return 1; 572 return 1;
568} 573}
569 574
570static inline int 575static int
571cleanup_match(struct ipt_entry_match *m, unsigned int *i) 576cleanup_match(struct ipt_entry_match *m, unsigned int *i)
572{ 577{
573 if (i && (*i)-- == 0) 578 if (i && (*i)-- == 0)
@@ -579,7 +584,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
579 return 0; 584 return 0;
580} 585}
581 586
582static inline int 587static int
583check_entry(struct ipt_entry *e, const char *name) 588check_entry(struct ipt_entry *e, const char *name)
584{ 589{
585 struct ipt_entry_target *t; 590 struct ipt_entry_target *t;
@@ -589,7 +594,8 @@ check_entry(struct ipt_entry *e, const char *name)
589 return -EINVAL; 594 return -EINVAL;
590 } 595 }
591 596
592 if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset) 597 if (e->target_offset + sizeof(struct ipt_entry_target) >
598 e->next_offset)
593 return -EINVAL; 599 return -EINVAL;
594 600
595 t = ipt_get_target(e); 601 t = ipt_get_target(e);
@@ -599,9 +605,10 @@ check_entry(struct ipt_entry *e, const char *name)
599 return 0; 605 return 0;
600} 606}
601 607
602static inline int check_match(struct ipt_entry_match *m, const char *name, 608static int
603 const struct ipt_ip *ip, unsigned int hookmask, 609check_match(struct ipt_entry_match *m, const char *name,
604 unsigned int *i) 610 const struct ipt_ip *ip,
611 unsigned int hookmask, unsigned int *i)
605{ 612{
606 struct xt_match *match; 613 struct xt_match *match;
607 int ret; 614 int ret;
@@ -622,18 +629,18 @@ static inline int check_match(struct ipt_entry_match *m, const char *name,
622 return ret; 629 return ret;
623} 630}
624 631
625static inline int 632static int
626find_check_match(struct ipt_entry_match *m, 633find_check_match(struct ipt_entry_match *m,
627 const char *name, 634 const char *name,
628 const struct ipt_ip *ip, 635 const struct ipt_ip *ip,
629 unsigned int hookmask, 636 unsigned int hookmask,
630 unsigned int *i) 637 unsigned int *i)
631{ 638{
632 struct xt_match *match; 639 struct xt_match *match;
633 int ret; 640 int ret;
634 641
635 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, 642 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
636 m->u.user.revision), 643 m->u.user.revision),
637 "ipt_%s", m->u.user.name); 644 "ipt_%s", m->u.user.name);
638 if (IS_ERR(match) || !match) { 645 if (IS_ERR(match) || !match) {
639 duprintf("find_check_match: `%s' not found\n", m->u.user.name); 646 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
@@ -651,7 +658,7 @@ err:
651 return ret; 658 return ret;
652} 659}
653 660
654static inline int check_target(struct ipt_entry *e, const char *name) 661static int check_target(struct ipt_entry *e, const char *name)
655{ 662{
656 struct ipt_entry_target *t; 663 struct ipt_entry_target *t;
657 struct xt_target *target; 664 struct xt_target *target;
@@ -663,8 +670,8 @@ static inline int check_target(struct ipt_entry *e, const char *name)
663 name, e->comefrom, e->ip.proto, 670 name, e->comefrom, e->ip.proto,
664 e->ip.invflags & IPT_INV_PROTO); 671 e->ip.invflags & IPT_INV_PROTO);
665 if (!ret && t->u.kernel.target->checkentry 672 if (!ret && t->u.kernel.target->checkentry
666 && !t->u.kernel.target->checkentry(name, e, target, 673 && !t->u.kernel.target->checkentry(name, e, target, t->data,
667 t->data, e->comefrom)) { 674 e->comefrom)) {
668 duprintf("ip_tables: check failed for `%s'.\n", 675 duprintf("ip_tables: check failed for `%s'.\n",
669 t->u.kernel.target->name); 676 t->u.kernel.target->name);
670 ret = -EINVAL; 677 ret = -EINVAL;
@@ -672,9 +679,9 @@ static inline int check_target(struct ipt_entry *e, const char *name)
672 return ret; 679 return ret;
673} 680}
674 681
675static inline int 682static int
676find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, 683find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
677 unsigned int *i) 684 unsigned int *i)
678{ 685{
679 struct ipt_entry_target *t; 686 struct ipt_entry_target *t;
680 struct xt_target *target; 687 struct xt_target *target;
@@ -687,14 +694,14 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
687 694
688 j = 0; 695 j = 0;
689 ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, 696 ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
690 e->comefrom, &j); 697 e->comefrom, &j);
691 if (ret != 0) 698 if (ret != 0)
692 goto cleanup_matches; 699 goto cleanup_matches;
693 700
694 t = ipt_get_target(e); 701 t = ipt_get_target(e);
695 target = try_then_request_module(xt_find_target(AF_INET, 702 target = try_then_request_module(xt_find_target(AF_INET,
696 t->u.user.name, 703 t->u.user.name,
697 t->u.user.revision), 704 t->u.user.revision),
698 "ipt_%s", t->u.user.name); 705 "ipt_%s", t->u.user.name);
699 if (IS_ERR(target) || !target) { 706 if (IS_ERR(target) || !target) {
700 duprintf("find_check_entry: `%s' not found\n", t->u.user.name); 707 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
@@ -716,7 +723,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
716 return ret; 723 return ret;
717} 724}
718 725
719static inline int 726static int
720check_entry_size_and_hooks(struct ipt_entry *e, 727check_entry_size_and_hooks(struct ipt_entry *e,
721 struct xt_table_info *newinfo, 728 struct xt_table_info *newinfo,
722 unsigned char *base, 729 unsigned char *base,
@@ -741,7 +748,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
741 } 748 }
742 749
743 /* Check hooks & underflows */ 750 /* Check hooks & underflows */
744 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 751 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
745 if ((unsigned char *)e - base == hook_entries[h]) 752 if ((unsigned char *)e - base == hook_entries[h])
746 newinfo->hook_entry[h] = hook_entries[h]; 753 newinfo->hook_entry[h] = hook_entries[h];
747 if ((unsigned char *)e - base == underflows[h]) 754 if ((unsigned char *)e - base == underflows[h])
@@ -759,7 +766,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
759 return 0; 766 return 0;
760} 767}
761 768
762static inline int 769static int
763cleanup_entry(struct ipt_entry *e, unsigned int *i) 770cleanup_entry(struct ipt_entry *e, unsigned int *i)
764{ 771{
765 struct ipt_entry_target *t; 772 struct ipt_entry_target *t;
@@ -795,7 +802,7 @@ translate_table(const char *name,
795 newinfo->number = number; 802 newinfo->number = number;
796 803
797 /* Init all hooks to impossible value. */ 804 /* Init all hooks to impossible value. */
798 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 805 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
799 newinfo->hook_entry[i] = 0xFFFFFFFF; 806 newinfo->hook_entry[i] = 0xFFFFFFFF;
800 newinfo->underflow[i] = 0xFFFFFFFF; 807 newinfo->underflow[i] = 0xFFFFFFFF;
801 } 808 }
@@ -819,7 +826,7 @@ translate_table(const char *name,
819 } 826 }
820 827
821 /* Check hooks all assigned */ 828 /* Check hooks all assigned */
822 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 829 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
823 /* Only hooks which are valid */ 830 /* Only hooks which are valid */
824 if (!(valid_hooks & (1 << i))) 831 if (!(valid_hooks & (1 << i)))
825 continue; 832 continue;
@@ -915,7 +922,7 @@ get_counters(const struct xt_table_info *t,
915 } 922 }
916} 923}
917 924
918static inline struct xt_counters * alloc_counters(struct xt_table *table) 925static struct xt_counters * alloc_counters(struct xt_table *table)
919{ 926{
920 unsigned int countersize; 927 unsigned int countersize;
921 struct xt_counters *counters; 928 struct xt_counters *counters;
@@ -959,7 +966,6 @@ copy_entries_to_user(unsigned int total_size,
959 * allowed to migrate to another cpu) 966 * allowed to migrate to another cpu)
960 */ 967 */
961 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 968 loc_cpu_entry = private->entries[raw_smp_processor_id()];
962 /* ... then copy entire thing ... */
963 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 969 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
964 ret = -EFAULT; 970 ret = -EFAULT;
965 goto free_counters; 971 goto free_counters;
@@ -1014,63 +1020,12 @@ copy_entries_to_user(unsigned int total_size,
1014} 1020}
1015 1021
1016#ifdef CONFIG_COMPAT 1022#ifdef CONFIG_COMPAT
1017struct compat_delta {
1018 struct compat_delta *next;
1019 unsigned int offset;
1020 short delta;
1021};
1022
1023static struct compat_delta *compat_offsets = NULL;
1024
1025static int compat_add_offset(unsigned int offset, short delta)
1026{
1027 struct compat_delta *tmp;
1028
1029 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
1030 if (!tmp)
1031 return -ENOMEM;
1032 tmp->offset = offset;
1033 tmp->delta = delta;
1034 if (compat_offsets) {
1035 tmp->next = compat_offsets->next;
1036 compat_offsets->next = tmp;
1037 } else {
1038 compat_offsets = tmp;
1039 tmp->next = NULL;
1040 }
1041 return 0;
1042}
1043
1044static void compat_flush_offsets(void)
1045{
1046 struct compat_delta *tmp, *next;
1047
1048 if (compat_offsets) {
1049 for(tmp = compat_offsets; tmp; tmp = next) {
1050 next = tmp->next;
1051 kfree(tmp);
1052 }
1053 compat_offsets = NULL;
1054 }
1055}
1056
1057static short compat_calc_jump(unsigned int offset)
1058{
1059 struct compat_delta *tmp;
1060 short delta;
1061
1062 for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
1063 if (tmp->offset < offset)
1064 delta += tmp->delta;
1065 return delta;
1066}
1067
1068static void compat_standard_from_user(void *dst, void *src) 1023static void compat_standard_from_user(void *dst, void *src)
1069{ 1024{
1070 int v = *(compat_int_t *)src; 1025 int v = *(compat_int_t *)src;
1071 1026
1072 if (v > 0) 1027 if (v > 0)
1073 v += compat_calc_jump(v); 1028 v += xt_compat_calc_jump(AF_INET, v);
1074 memcpy(dst, &v, sizeof(v)); 1029 memcpy(dst, &v, sizeof(v));
1075} 1030}
1076 1031
@@ -1079,76 +1034,73 @@ static int compat_standard_to_user(void __user *dst, void *src)
1079 compat_int_t cv = *(int *)src; 1034 compat_int_t cv = *(int *)src;
1080 1035
1081 if (cv > 0) 1036 if (cv > 0)
1082 cv -= compat_calc_jump(cv); 1037 cv -= xt_compat_calc_jump(AF_INET, cv);
1083 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; 1038 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1084} 1039}
1085 1040
1086static inline int 1041static inline int
1087compat_calc_match(struct ipt_entry_match *m, int * size) 1042compat_calc_match(struct ipt_entry_match *m, int *size)
1088{ 1043{
1089 *size += xt_compat_match_offset(m->u.kernel.match); 1044 *size += xt_compat_match_offset(m->u.kernel.match);
1090 return 0; 1045 return 0;
1091} 1046}
1092 1047
1093static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, 1048static int compat_calc_entry(struct ipt_entry *e,
1094 void *base, struct xt_table_info *newinfo) 1049 const struct xt_table_info *info,
1050 void *base, struct xt_table_info *newinfo)
1095{ 1051{
1096 struct ipt_entry_target *t; 1052 struct ipt_entry_target *t;
1097 unsigned int entry_offset; 1053 unsigned int entry_offset;
1098 int off, i, ret; 1054 int off, i, ret;
1099 1055
1100 off = 0; 1056 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1101 entry_offset = (void *)e - base; 1057 entry_offset = (void *)e - base;
1102 IPT_MATCH_ITERATE(e, compat_calc_match, &off); 1058 IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1103 t = ipt_get_target(e); 1059 t = ipt_get_target(e);
1104 off += xt_compat_target_offset(t->u.kernel.target); 1060 off += xt_compat_target_offset(t->u.kernel.target);
1105 newinfo->size -= off; 1061 newinfo->size -= off;
1106 ret = compat_add_offset(entry_offset, off); 1062 ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1107 if (ret) 1063 if (ret)
1108 return ret; 1064 return ret;
1109 1065
1110 for (i = 0; i< NF_IP_NUMHOOKS; i++) { 1066 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1111 if (info->hook_entry[i] && (e < (struct ipt_entry *) 1067 if (info->hook_entry[i] &&
1112 (base + info->hook_entry[i]))) 1068 (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1113 newinfo->hook_entry[i] -= off; 1069 newinfo->hook_entry[i] -= off;
1114 if (info->underflow[i] && (e < (struct ipt_entry *) 1070 if (info->underflow[i] &&
1115 (base + info->underflow[i]))) 1071 (e < (struct ipt_entry *)(base + info->underflow[i])))
1116 newinfo->underflow[i] -= off; 1072 newinfo->underflow[i] -= off;
1117 } 1073 }
1118 return 0; 1074 return 0;
1119} 1075}
1120 1076
1121static int compat_table_info(struct xt_table_info *info, 1077static int compat_table_info(const struct xt_table_info *info,
1122 struct xt_table_info *newinfo) 1078 struct xt_table_info *newinfo)
1123{ 1079{
1124 void *loc_cpu_entry; 1080 void *loc_cpu_entry;
1125 int i;
1126 1081
1127 if (!newinfo || !info) 1082 if (!newinfo || !info)
1128 return -EINVAL; 1083 return -EINVAL;
1129 1084
1130 memset(newinfo, 0, sizeof(struct xt_table_info)); 1085 /* we dont care about newinfo->entries[] */
1131 newinfo->size = info->size; 1086 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1132 newinfo->number = info->number; 1087 newinfo->initial_entries = 0;
1133 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
1134 newinfo->hook_entry[i] = info->hook_entry[i];
1135 newinfo->underflow[i] = info->underflow[i];
1136 }
1137 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1088 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1138 return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size, 1089 return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1139 compat_calc_entry, info, loc_cpu_entry, newinfo); 1090 compat_calc_entry, info, loc_cpu_entry,
1091 newinfo);
1140} 1092}
1141#endif 1093#endif
1142 1094
1143static int get_info(void __user *user, int *len, int compat) 1095static int get_info(struct net *net, void __user *user, int *len, int compat)
1144{ 1096{
1145 char name[IPT_TABLE_MAXNAMELEN]; 1097 char name[IPT_TABLE_MAXNAMELEN];
1146 struct xt_table *t; 1098 struct xt_table *t;
1147 int ret; 1099 int ret;
1148 1100
1149 if (*len != sizeof(struct ipt_getinfo)) { 1101 if (*len != sizeof(struct ipt_getinfo)) {
1150 duprintf("length %u != %u\n", *len, 1102 duprintf("length %u != %zu\n", *len,
1151 (unsigned int)sizeof(struct ipt_getinfo)); 1103 sizeof(struct ipt_getinfo));
1152 return -EINVAL; 1104 return -EINVAL;
1153 } 1105 }
1154 1106
@@ -1160,8 +1112,8 @@ static int get_info(void __user *user, int *len, int compat)
1160 if (compat) 1112 if (compat)
1161 xt_compat_lock(AF_INET); 1113 xt_compat_lock(AF_INET);
1162#endif 1114#endif
1163 t = try_then_request_module(xt_find_table_lock(AF_INET, name), 1115 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1164 "iptable_%s", name); 1116 "iptable_%s", name);
1165 if (t && !IS_ERR(t)) { 1117 if (t && !IS_ERR(t)) {
1166 struct ipt_getinfo info; 1118 struct ipt_getinfo info;
1167 struct xt_table_info *private = t->private; 1119 struct xt_table_info *private = t->private;
@@ -1170,15 +1122,15 @@ static int get_info(void __user *user, int *len, int compat)
1170 if (compat) { 1122 if (compat) {
1171 struct xt_table_info tmp; 1123 struct xt_table_info tmp;
1172 ret = compat_table_info(private, &tmp); 1124 ret = compat_table_info(private, &tmp);
1173 compat_flush_offsets(); 1125 xt_compat_flush_offsets(AF_INET);
1174 private = &tmp; 1126 private = &tmp;
1175 } 1127 }
1176#endif 1128#endif
1177 info.valid_hooks = t->valid_hooks; 1129 info.valid_hooks = t->valid_hooks;
1178 memcpy(info.hook_entry, private->hook_entry, 1130 memcpy(info.hook_entry, private->hook_entry,
1179 sizeof(info.hook_entry)); 1131 sizeof(info.hook_entry));
1180 memcpy(info.underflow, private->underflow, 1132 memcpy(info.underflow, private->underflow,
1181 sizeof(info.underflow)); 1133 sizeof(info.underflow));
1182 info.num_entries = private->number; 1134 info.num_entries = private->number;
1183 info.size = private->size; 1135 info.size = private->size;
1184 strcpy(info.name, name); 1136 strcpy(info.name, name);
@@ -1200,38 +1152,34 @@ static int get_info(void __user *user, int *len, int compat)
1200} 1152}
1201 1153
1202static int 1154static int
1203get_entries(struct ipt_get_entries __user *uptr, int *len) 1155get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1204{ 1156{
1205 int ret; 1157 int ret;
1206 struct ipt_get_entries get; 1158 struct ipt_get_entries get;
1207 struct xt_table *t; 1159 struct xt_table *t;
1208 1160
1209 if (*len < sizeof(get)) { 1161 if (*len < sizeof(get)) {
1210 duprintf("get_entries: %u < %d\n", *len, 1162 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1211 (unsigned int)sizeof(get));
1212 return -EINVAL; 1163 return -EINVAL;
1213 } 1164 }
1214 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1165 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1215 return -EFAULT; 1166 return -EFAULT;
1216 if (*len != sizeof(struct ipt_get_entries) + get.size) { 1167 if (*len != sizeof(struct ipt_get_entries) + get.size) {
1217 duprintf("get_entries: %u != %u\n", *len, 1168 duprintf("get_entries: %u != %zu\n",
1218 (unsigned int)(sizeof(struct ipt_get_entries) + 1169 *len, sizeof(get) + get.size);
1219 get.size));
1220 return -EINVAL; 1170 return -EINVAL;
1221 } 1171 }
1222 1172
1223 t = xt_find_table_lock(AF_INET, get.name); 1173 t = xt_find_table_lock(net, AF_INET, get.name);
1224 if (t && !IS_ERR(t)) { 1174 if (t && !IS_ERR(t)) {
1225 struct xt_table_info *private = t->private; 1175 struct xt_table_info *private = t->private;
1226 duprintf("t->private->number = %u\n", 1176 duprintf("t->private->number = %u\n", private->number);
1227 private->number);
1228 if (get.size == private->size) 1177 if (get.size == private->size)
1229 ret = copy_entries_to_user(private->size, 1178 ret = copy_entries_to_user(private->size,
1230 t, uptr->entrytable); 1179 t, uptr->entrytable);
1231 else { 1180 else {
1232 duprintf("get_entries: I've got %u not %u!\n", 1181 duprintf("get_entries: I've got %u not %u!\n",
1233 private->size, 1182 private->size, get.size);
1234 get.size);
1235 ret = -EINVAL; 1183 ret = -EINVAL;
1236 } 1184 }
1237 module_put(t->me); 1185 module_put(t->me);
@@ -1243,9 +1191,9 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
1243} 1191}
1244 1192
1245static int 1193static int
1246__do_replace(const char *name, unsigned int valid_hooks, 1194__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1247 struct xt_table_info *newinfo, unsigned int num_counters, 1195 struct xt_table_info *newinfo, unsigned int num_counters,
1248 void __user *counters_ptr) 1196 void __user *counters_ptr)
1249{ 1197{
1250 int ret; 1198 int ret;
1251 struct xt_table *t; 1199 struct xt_table *t;
@@ -1260,7 +1208,7 @@ __do_replace(const char *name, unsigned int valid_hooks,
1260 goto out; 1208 goto out;
1261 } 1209 }
1262 1210
1263 t = try_then_request_module(xt_find_table_lock(AF_INET, name), 1211 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1264 "iptable_%s", name); 1212 "iptable_%s", name);
1265 if (!t || IS_ERR(t)) { 1213 if (!t || IS_ERR(t)) {
1266 ret = t ? PTR_ERR(t) : -ENOENT; 1214 ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1293,7 +1241,8 @@ __do_replace(const char *name, unsigned int valid_hooks,
1293 get_counters(oldinfo, counters); 1241 get_counters(oldinfo, counters);
1294 /* Decrease module usage counts and free resource */ 1242 /* Decrease module usage counts and free resource */
1295 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1243 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1296 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); 1244 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1245 NULL);
1297 xt_free_table_info(oldinfo); 1246 xt_free_table_info(oldinfo);
1298 if (copy_to_user(counters_ptr, counters, 1247 if (copy_to_user(counters_ptr, counters,
1299 sizeof(struct xt_counters) * num_counters) != 0) 1248 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1312,7 +1261,7 @@ __do_replace(const char *name, unsigned int valid_hooks,
1312} 1261}
1313 1262
1314static int 1263static int
1315do_replace(void __user *user, unsigned int len) 1264do_replace(struct net *net, void __user *user, unsigned int len)
1316{ 1265{
1317 int ret; 1266 int ret;
1318 struct ipt_replace tmp; 1267 struct ipt_replace tmp;
@@ -1322,14 +1271,7 @@ do_replace(void __user *user, unsigned int len)
1322 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1271 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1323 return -EFAULT; 1272 return -EFAULT;
1324 1273
1325 /* Hack: Causes ipchains to give correct error msg --RR */
1326 if (len != sizeof(tmp) + tmp.size)
1327 return -ENOPROTOOPT;
1328
1329 /* overflow check */ 1274 /* overflow check */
1330 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
1331 SMP_CACHE_BYTES)
1332 return -ENOMEM;
1333 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1275 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1334 return -ENOMEM; 1276 return -ENOMEM;
1335 1277
@@ -1337,7 +1279,7 @@ do_replace(void __user *user, unsigned int len)
1337 if (!newinfo) 1279 if (!newinfo)
1338 return -ENOMEM; 1280 return -ENOMEM;
1339 1281
1340 /* choose the copy that is our node/cpu */ 1282 /* choose the copy that is on our node/cpu */
1341 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 1283 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1342 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1284 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1343 tmp.size) != 0) { 1285 tmp.size) != 0) {
@@ -1353,15 +1295,14 @@ do_replace(void __user *user, unsigned int len)
1353 1295
1354 duprintf("ip_tables: Translated table\n"); 1296 duprintf("ip_tables: Translated table\n");
1355 1297
1356 ret = __do_replace(tmp.name, tmp.valid_hooks, 1298 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1357 newinfo, tmp.num_counters, 1299 tmp.num_counters, tmp.counters);
1358 tmp.counters);
1359 if (ret) 1300 if (ret)
1360 goto free_newinfo_untrans; 1301 goto free_newinfo_untrans;
1361 return 0; 1302 return 0;
1362 1303
1363 free_newinfo_untrans: 1304 free_newinfo_untrans:
1364 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); 1305 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1365 free_newinfo: 1306 free_newinfo:
1366 xt_free_table_info(newinfo); 1307 xt_free_table_info(newinfo);
1367 return ret; 1308 return ret;
@@ -1369,7 +1310,7 @@ do_replace(void __user *user, unsigned int len)
1369 1310
1370/* We're lazy, and add to the first CPU; overflow works its fey magic 1311/* We're lazy, and add to the first CPU; overflow works its fey magic
1371 * and everything is OK. */ 1312 * and everything is OK. */
1372static inline int 1313static int
1373add_counter_to_entry(struct ipt_entry *e, 1314add_counter_to_entry(struct ipt_entry *e,
1374 const struct xt_counters addme[], 1315 const struct xt_counters addme[],
1375 unsigned int *i) 1316 unsigned int *i)
@@ -1390,7 +1331,7 @@ add_counter_to_entry(struct ipt_entry *e,
1390} 1331}
1391 1332
1392static int 1333static int
1393do_add_counters(void __user *user, unsigned int len, int compat) 1334do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1394{ 1335{
1395 unsigned int i; 1336 unsigned int i;
1396 struct xt_counters_info tmp; 1337 struct xt_counters_info tmp;
@@ -1442,7 +1383,7 @@ do_add_counters(void __user *user, unsigned int len, int compat)
1442 goto free; 1383 goto free;
1443 } 1384 }
1444 1385
1445 t = xt_find_table_lock(AF_INET, name); 1386 t = xt_find_table_lock(net, AF_INET, name);
1446 if (!t || IS_ERR(t)) { 1387 if (!t || IS_ERR(t)) {
1447 ret = t ? PTR_ERR(t) : -ENOENT; 1388 ret = t ? PTR_ERR(t) : -ENOENT;
1448 goto free; 1389 goto free;
@@ -1479,21 +1420,17 @@ struct compat_ipt_replace {
1479 u32 valid_hooks; 1420 u32 valid_hooks;
1480 u32 num_entries; 1421 u32 num_entries;
1481 u32 size; 1422 u32 size;
1482 u32 hook_entry[NF_IP_NUMHOOKS]; 1423 u32 hook_entry[NF_INET_NUMHOOKS];
1483 u32 underflow[NF_IP_NUMHOOKS]; 1424 u32 underflow[NF_INET_NUMHOOKS];
1484 u32 num_counters; 1425 u32 num_counters;
1485 compat_uptr_t counters; /* struct ipt_counters * */ 1426 compat_uptr_t counters; /* struct ipt_counters * */
1486 struct compat_ipt_entry entries[0]; 1427 struct compat_ipt_entry entries[0];
1487}; 1428};
1488 1429
1489static inline int compat_copy_match_to_user(struct ipt_entry_match *m, 1430static int
1490 void __user **dstptr, compat_uint_t *size) 1431compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1491{ 1432 unsigned int *size, struct xt_counters *counters,
1492 return xt_compat_match_to_user(m, dstptr, size); 1433 unsigned int *i)
1493}
1494
1495static int compat_copy_entry_to_user(struct ipt_entry *e,
1496 void __user **dstptr, compat_uint_t *size)
1497{ 1434{
1498 struct ipt_entry_target *t; 1435 struct ipt_entry_target *t;
1499 struct compat_ipt_entry __user *ce; 1436 struct compat_ipt_entry __user *ce;
@@ -1507,8 +1444,13 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
1507 if (copy_to_user(ce, e, sizeof(struct ipt_entry))) 1444 if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1508 goto out; 1445 goto out;
1509 1446
1447 if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1448 goto out;
1449
1510 *dstptr += sizeof(struct compat_ipt_entry); 1450 *dstptr += sizeof(struct compat_ipt_entry);
1511 ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size); 1451 *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1452
1453 ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1512 target_offset = e->target_offset - (origsize - *size); 1454 target_offset = e->target_offset - (origsize - *size);
1513 if (ret) 1455 if (ret)
1514 goto out; 1456 goto out;
@@ -1522,26 +1464,28 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
1522 goto out; 1464 goto out;
1523 if (put_user(next_offset, &ce->next_offset)) 1465 if (put_user(next_offset, &ce->next_offset))
1524 goto out; 1466 goto out;
1467
1468 (*i)++;
1525 return 0; 1469 return 0;
1526out: 1470out:
1527 return ret; 1471 return ret;
1528} 1472}
1529 1473
1530static inline int 1474static int
1531compat_find_calc_match(struct ipt_entry_match *m, 1475compat_find_calc_match(struct ipt_entry_match *m,
1532 const char *name, 1476 const char *name,
1533 const struct ipt_ip *ip, 1477 const struct ipt_ip *ip,
1534 unsigned int hookmask, 1478 unsigned int hookmask,
1535 int *size, int *i) 1479 int *size, unsigned int *i)
1536{ 1480{
1537 struct xt_match *match; 1481 struct xt_match *match;
1538 1482
1539 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, 1483 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1540 m->u.user.revision), 1484 m->u.user.revision),
1541 "ipt_%s", m->u.user.name); 1485 "ipt_%s", m->u.user.name);
1542 if (IS_ERR(match) || !match) { 1486 if (IS_ERR(match) || !match) {
1543 duprintf("compat_check_calc_match: `%s' not found\n", 1487 duprintf("compat_check_calc_match: `%s' not found\n",
1544 m->u.user.name); 1488 m->u.user.name);
1545 return match ? PTR_ERR(match) : -ENOENT; 1489 return match ? PTR_ERR(match) : -ENOENT;
1546 } 1490 }
1547 m->u.kernel.match = match; 1491 m->u.kernel.match = match;
@@ -1551,7 +1495,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
1551 return 0; 1495 return 0;
1552} 1496}
1553 1497
1554static inline int 1498static int
1555compat_release_match(struct ipt_entry_match *m, unsigned int *i) 1499compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1556{ 1500{
1557 if (i && (*i)-- == 0) 1501 if (i && (*i)-- == 0)
@@ -1561,8 +1505,8 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1561 return 0; 1505 return 0;
1562} 1506}
1563 1507
1564static inline int 1508static int
1565compat_release_entry(struct ipt_entry *e, unsigned int *i) 1509compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1566{ 1510{
1567 struct ipt_entry_target *t; 1511 struct ipt_entry_target *t;
1568 1512
@@ -1570,27 +1514,28 @@ compat_release_entry(struct ipt_entry *e, unsigned int *i)
1570 return 1; 1514 return 1;
1571 1515
1572 /* Cleanup all matches */ 1516 /* Cleanup all matches */
1573 IPT_MATCH_ITERATE(e, compat_release_match, NULL); 1517 COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1574 t = ipt_get_target(e); 1518 t = compat_ipt_get_target(e);
1575 module_put(t->u.kernel.target->me); 1519 module_put(t->u.kernel.target->me);
1576 return 0; 1520 return 0;
1577} 1521}
1578 1522
1579static inline int 1523static int
1580check_compat_entry_size_and_hooks(struct ipt_entry *e, 1524check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1581 struct xt_table_info *newinfo, 1525 struct xt_table_info *newinfo,
1582 unsigned int *size, 1526 unsigned int *size,
1583 unsigned char *base, 1527 unsigned char *base,
1584 unsigned char *limit, 1528 unsigned char *limit,
1585 unsigned int *hook_entries, 1529 unsigned int *hook_entries,
1586 unsigned int *underflows, 1530 unsigned int *underflows,
1587 unsigned int *i, 1531 unsigned int *i,
1588 const char *name) 1532 const char *name)
1589{ 1533{
1590 struct ipt_entry_target *t; 1534 struct ipt_entry_target *t;
1591 struct xt_target *target; 1535 struct xt_target *target;
1592 unsigned int entry_offset; 1536 unsigned int entry_offset;
1593 int ret, off, h, j; 1537 unsigned int j;
1538 int ret, off, h;
1594 1539
1595 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1540 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1596 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 1541 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
@@ -1600,32 +1545,33 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1600 } 1545 }
1601 1546
1602 if (e->next_offset < sizeof(struct compat_ipt_entry) + 1547 if (e->next_offset < sizeof(struct compat_ipt_entry) +
1603 sizeof(struct compat_xt_entry_target)) { 1548 sizeof(struct compat_xt_entry_target)) {
1604 duprintf("checking: element %p size %u\n", 1549 duprintf("checking: element %p size %u\n",
1605 e, e->next_offset); 1550 e, e->next_offset);
1606 return -EINVAL; 1551 return -EINVAL;
1607 } 1552 }
1608 1553
1609 ret = check_entry(e, name); 1554 /* For purposes of check_entry casting the compat entry is fine */
1555 ret = check_entry((struct ipt_entry *)e, name);
1610 if (ret) 1556 if (ret)
1611 return ret; 1557 return ret;
1612 1558
1613 off = 0; 1559 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1614 entry_offset = (void *)e - (void *)base; 1560 entry_offset = (void *)e - (void *)base;
1615 j = 0; 1561 j = 0;
1616 ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip, 1562 ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1617 e->comefrom, &off, &j); 1563 &e->ip, e->comefrom, &off, &j);
1618 if (ret != 0) 1564 if (ret != 0)
1619 goto release_matches; 1565 goto release_matches;
1620 1566
1621 t = ipt_get_target(e); 1567 t = compat_ipt_get_target(e);
1622 target = try_then_request_module(xt_find_target(AF_INET, 1568 target = try_then_request_module(xt_find_target(AF_INET,
1623 t->u.user.name, 1569 t->u.user.name,
1624 t->u.user.revision), 1570 t->u.user.revision),
1625 "ipt_%s", t->u.user.name); 1571 "ipt_%s", t->u.user.name);
1626 if (IS_ERR(target) || !target) { 1572 if (IS_ERR(target) || !target) {
1627 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", 1573 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1628 t->u.user.name); 1574 t->u.user.name);
1629 ret = target ? PTR_ERR(target) : -ENOENT; 1575 ret = target ? PTR_ERR(target) : -ENOENT;
1630 goto release_matches; 1576 goto release_matches;
1631 } 1577 }
@@ -1633,12 +1579,12 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1633 1579
1634 off += xt_compat_target_offset(target); 1580 off += xt_compat_target_offset(target);
1635 *size += off; 1581 *size += off;
1636 ret = compat_add_offset(entry_offset, off); 1582 ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1637 if (ret) 1583 if (ret)
1638 goto out; 1584 goto out;
1639 1585
1640 /* Check hooks & underflows */ 1586 /* Check hooks & underflows */
1641 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 1587 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1642 if ((unsigned char *)e - base == hook_entries[h]) 1588 if ((unsigned char *)e - base == hook_entries[h])
1643 newinfo->hook_entry[h] = hook_entries[h]; 1589 newinfo->hook_entry[h] = hook_entries[h];
1644 if ((unsigned char *)e - base == underflows[h]) 1590 if ((unsigned char *)e - base == underflows[h])
@@ -1646,7 +1592,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1646 } 1592 }
1647 1593
1648 /* Clear counters and comefrom */ 1594 /* Clear counters and comefrom */
1649 e->counters = ((struct ipt_counters) { 0, 0 }); 1595 memset(&e->counters, 0, sizeof(e->counters));
1650 e->comefrom = 0; 1596 e->comefrom = 0;
1651 1597
1652 (*i)++; 1598 (*i)++;
@@ -1659,17 +1605,10 @@ release_matches:
1659 return ret; 1605 return ret;
1660} 1606}
1661 1607
1662static inline int compat_copy_match_from_user(struct ipt_entry_match *m, 1608static int
1663 void **dstptr, compat_uint_t *size, const char *name, 1609compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1664 const struct ipt_ip *ip, unsigned int hookmask) 1610 unsigned int *size, const char *name,
1665{ 1611 struct xt_table_info *newinfo, unsigned char *base)
1666 xt_compat_match_from_user(m, dstptr, size);
1667 return 0;
1668}
1669
1670static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1671 unsigned int *size, const char *name,
1672 struct xt_table_info *newinfo, unsigned char *base)
1673{ 1612{
1674 struct ipt_entry_target *t; 1613 struct ipt_entry_target *t;
1675 struct xt_target *target; 1614 struct xt_target *target;
@@ -1681,19 +1620,22 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1681 origsize = *size; 1620 origsize = *size;
1682 de = (struct ipt_entry *)*dstptr; 1621 de = (struct ipt_entry *)*dstptr;
1683 memcpy(de, e, sizeof(struct ipt_entry)); 1622 memcpy(de, e, sizeof(struct ipt_entry));
1623 memcpy(&de->counters, &e->counters, sizeof(e->counters));
1684 1624
1685 *dstptr += sizeof(struct compat_ipt_entry); 1625 *dstptr += sizeof(struct ipt_entry);
1686 ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, 1626 *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1687 name, &de->ip, de->comefrom); 1627
1628 ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1629 dstptr, size);
1688 if (ret) 1630 if (ret)
1689 return ret; 1631 return ret;
1690 de->target_offset = e->target_offset - (origsize - *size); 1632 de->target_offset = e->target_offset - (origsize - *size);
1691 t = ipt_get_target(e); 1633 t = compat_ipt_get_target(e);
1692 target = t->u.kernel.target; 1634 target = t->u.kernel.target;
1693 xt_compat_target_from_user(t, dstptr, size); 1635 xt_compat_target_from_user(t, dstptr, size);
1694 1636
1695 de->next_offset = e->next_offset - (origsize - *size); 1637 de->next_offset = e->next_offset - (origsize - *size);
1696 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 1638 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1697 if ((unsigned char *)de - base < newinfo->hook_entry[h]) 1639 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1698 newinfo->hook_entry[h] -= origsize - *size; 1640 newinfo->hook_entry[h] -= origsize - *size;
1699 if ((unsigned char *)de - base < newinfo->underflow[h]) 1641 if ((unsigned char *)de - base < newinfo->underflow[h])
@@ -1702,13 +1644,16 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1702 return ret; 1644 return ret;
1703} 1645}
1704 1646
1705static inline int compat_check_entry(struct ipt_entry *e, const char *name, 1647static int
1706 unsigned int *i) 1648compat_check_entry(struct ipt_entry *e, const char *name,
1649 unsigned int *i)
1707{ 1650{
1708 int j, ret; 1651 unsigned int j;
1652 int ret;
1709 1653
1710 j = 0; 1654 j = 0;
1711 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); 1655 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
1656 e->comefrom, &j);
1712 if (ret) 1657 if (ret)
1713 goto cleanup_matches; 1658 goto cleanup_matches;
1714 1659
@@ -1726,13 +1671,13 @@ static inline int compat_check_entry(struct ipt_entry *e, const char *name,
1726 1671
1727static int 1672static int
1728translate_compat_table(const char *name, 1673translate_compat_table(const char *name,
1729 unsigned int valid_hooks, 1674 unsigned int valid_hooks,
1730 struct xt_table_info **pinfo, 1675 struct xt_table_info **pinfo,
1731 void **pentry0, 1676 void **pentry0,
1732 unsigned int total_size, 1677 unsigned int total_size,
1733 unsigned int number, 1678 unsigned int number,
1734 unsigned int *hook_entries, 1679 unsigned int *hook_entries,
1735 unsigned int *underflows) 1680 unsigned int *underflows)
1736{ 1681{
1737 unsigned int i, j; 1682 unsigned int i, j;
1738 struct xt_table_info *newinfo, *info; 1683 struct xt_table_info *newinfo, *info;
@@ -1746,7 +1691,7 @@ translate_compat_table(const char *name,
1746 info->number = number; 1691 info->number = number;
1747 1692
1748 /* Init all hooks to impossible value. */ 1693 /* Init all hooks to impossible value. */
1749 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 1694 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1750 info->hook_entry[i] = 0xFFFFFFFF; 1695 info->hook_entry[i] = 0xFFFFFFFF;
1751 info->underflow[i] = 0xFFFFFFFF; 1696 info->underflow[i] = 0xFFFFFFFF;
1752 } 1697 }
@@ -1755,11 +1700,11 @@ translate_compat_table(const char *name,
1755 j = 0; 1700 j = 0;
1756 xt_compat_lock(AF_INET); 1701 xt_compat_lock(AF_INET);
1757 /* Walk through entries, checking offsets. */ 1702 /* Walk through entries, checking offsets. */
1758 ret = IPT_ENTRY_ITERATE(entry0, total_size, 1703 ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1759 check_compat_entry_size_and_hooks, 1704 check_compat_entry_size_and_hooks,
1760 info, &size, entry0, 1705 info, &size, entry0,
1761 entry0 + total_size, 1706 entry0 + total_size,
1762 hook_entries, underflows, &j, name); 1707 hook_entries, underflows, &j, name);
1763 if (ret != 0) 1708 if (ret != 0)
1764 goto out_unlock; 1709 goto out_unlock;
1765 1710
@@ -1771,7 +1716,7 @@ translate_compat_table(const char *name,
1771 } 1716 }
1772 1717
1773 /* Check hooks all assigned */ 1718 /* Check hooks all assigned */
1774 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 1719 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1775 /* Only hooks which are valid */ 1720 /* Only hooks which are valid */
1776 if (!(valid_hooks & (1 << i))) 1721 if (!(valid_hooks & (1 << i)))
1777 continue; 1722 continue;
@@ -1793,17 +1738,17 @@ translate_compat_table(const char *name,
1793 goto out_unlock; 1738 goto out_unlock;
1794 1739
1795 newinfo->number = number; 1740 newinfo->number = number;
1796 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 1741 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1797 newinfo->hook_entry[i] = info->hook_entry[i]; 1742 newinfo->hook_entry[i] = info->hook_entry[i];
1798 newinfo->underflow[i] = info->underflow[i]; 1743 newinfo->underflow[i] = info->underflow[i];
1799 } 1744 }
1800 entry1 = newinfo->entries[raw_smp_processor_id()]; 1745 entry1 = newinfo->entries[raw_smp_processor_id()];
1801 pos = entry1; 1746 pos = entry1;
1802 size = total_size; 1747 size = total_size;
1803 ret = IPT_ENTRY_ITERATE(entry0, total_size, 1748 ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1804 compat_copy_entry_from_user, &pos, &size, 1749 compat_copy_entry_from_user,
1805 name, newinfo, entry1); 1750 &pos, &size, name, newinfo, entry1);
1806 compat_flush_offsets(); 1751 xt_compat_flush_offsets(AF_INET);
1807 xt_compat_unlock(AF_INET); 1752 xt_compat_unlock(AF_INET);
1808 if (ret) 1753 if (ret)
1809 goto free_newinfo; 1754 goto free_newinfo;
@@ -1814,11 +1759,11 @@ translate_compat_table(const char *name,
1814 1759
1815 i = 0; 1760 i = 0;
1816 ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, 1761 ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1817 name, &i); 1762 name, &i);
1818 if (ret) { 1763 if (ret) {
1819 j -= i; 1764 j -= i;
1820 IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i, 1765 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1821 compat_release_entry, &j); 1766 compat_release_entry, &j);
1822 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); 1767 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1823 xt_free_table_info(newinfo); 1768 xt_free_table_info(newinfo);
1824 return ret; 1769 return ret;
@@ -1837,16 +1782,16 @@ translate_compat_table(const char *name,
1837free_newinfo: 1782free_newinfo:
1838 xt_free_table_info(newinfo); 1783 xt_free_table_info(newinfo);
1839out: 1784out:
1840 IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); 1785 COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1841 return ret; 1786 return ret;
1842out_unlock: 1787out_unlock:
1843 compat_flush_offsets(); 1788 xt_compat_flush_offsets(AF_INET);
1844 xt_compat_unlock(AF_INET); 1789 xt_compat_unlock(AF_INET);
1845 goto out; 1790 goto out;
1846} 1791}
1847 1792
1848static int 1793static int
1849compat_do_replace(void __user *user, unsigned int len) 1794compat_do_replace(struct net *net, void __user *user, unsigned int len)
1850{ 1795{
1851 int ret; 1796 int ret;
1852 struct compat_ipt_replace tmp; 1797 struct compat_ipt_replace tmp;
@@ -1856,13 +1801,8 @@ compat_do_replace(void __user *user, unsigned int len)
1856 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1801 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1857 return -EFAULT; 1802 return -EFAULT;
1858 1803
1859 /* Hack: Causes ipchains to give correct error msg --RR */
1860 if (len != sizeof(tmp) + tmp.size)
1861 return -ENOPROTOOPT;
1862
1863 /* overflow check */ 1804 /* overflow check */
1864 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS - 1805 if (tmp.size >= INT_MAX / num_possible_cpus())
1865 SMP_CACHE_BYTES)
1866 return -ENOMEM; 1806 return -ENOMEM;
1867 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1807 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1868 return -ENOMEM; 1808 return -ENOMEM;
@@ -1871,7 +1811,7 @@ compat_do_replace(void __user *user, unsigned int len)
1871 if (!newinfo) 1811 if (!newinfo)
1872 return -ENOMEM; 1812 return -ENOMEM;
1873 1813
1874 /* choose the copy that is our node/cpu */ 1814 /* choose the copy that is on our node/cpu */
1875 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 1815 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1876 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1816 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1877 tmp.size) != 0) { 1817 tmp.size) != 0) {
@@ -1880,22 +1820,22 @@ compat_do_replace(void __user *user, unsigned int len)
1880 } 1820 }
1881 1821
1882 ret = translate_compat_table(tmp.name, tmp.valid_hooks, 1822 ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1883 &newinfo, &loc_cpu_entry, tmp.size, 1823 &newinfo, &loc_cpu_entry, tmp.size,
1884 tmp.num_entries, tmp.hook_entry, tmp.underflow); 1824 tmp.num_entries, tmp.hook_entry,
1825 tmp.underflow);
1885 if (ret != 0) 1826 if (ret != 0)
1886 goto free_newinfo; 1827 goto free_newinfo;
1887 1828
1888 duprintf("compat_do_replace: Translated table\n"); 1829 duprintf("compat_do_replace: Translated table\n");
1889 1830
1890 ret = __do_replace(tmp.name, tmp.valid_hooks, 1831 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1891 newinfo, tmp.num_counters, 1832 tmp.num_counters, compat_ptr(tmp.counters));
1892 compat_ptr(tmp.counters));
1893 if (ret) 1833 if (ret)
1894 goto free_newinfo_untrans; 1834 goto free_newinfo_untrans;
1895 return 0; 1835 return 0;
1896 1836
1897 free_newinfo_untrans: 1837 free_newinfo_untrans:
1898 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); 1838 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1899 free_newinfo: 1839 free_newinfo:
1900 xt_free_table_info(newinfo); 1840 xt_free_table_info(newinfo);
1901 return ret; 1841 return ret;
@@ -1903,7 +1843,7 @@ compat_do_replace(void __user *user, unsigned int len)
1903 1843
1904static int 1844static int
1905compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, 1845compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1906 unsigned int len) 1846 unsigned int len)
1907{ 1847{
1908 int ret; 1848 int ret;
1909 1849
@@ -1912,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1912 1852
1913 switch (cmd) { 1853 switch (cmd) {
1914 case IPT_SO_SET_REPLACE: 1854 case IPT_SO_SET_REPLACE:
1915 ret = compat_do_replace(user, len); 1855 ret = compat_do_replace(sk->sk_net, user, len);
1916 break; 1856 break;
1917 1857
1918 case IPT_SO_SET_ADD_COUNTERS: 1858 case IPT_SO_SET_ADD_COUNTERS:
1919 ret = do_add_counters(user, len, 1); 1859 ret = do_add_counters(sk->sk_net, user, len, 1);
1920 break; 1860 break;
1921 1861
1922 default: 1862 default:
@@ -1927,24 +1867,23 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1927 return ret; 1867 return ret;
1928} 1868}
1929 1869
1930struct compat_ipt_get_entries 1870struct compat_ipt_get_entries {
1931{
1932 char name[IPT_TABLE_MAXNAMELEN]; 1871 char name[IPT_TABLE_MAXNAMELEN];
1933 compat_uint_t size; 1872 compat_uint_t size;
1934 struct compat_ipt_entry entrytable[0]; 1873 struct compat_ipt_entry entrytable[0];
1935}; 1874};
1936 1875
1937static int compat_copy_entries_to_user(unsigned int total_size, 1876static int
1938 struct xt_table *table, void __user *userptr) 1877compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1878 void __user *userptr)
1939{ 1879{
1940 unsigned int off, num;
1941 struct compat_ipt_entry e;
1942 struct xt_counters *counters; 1880 struct xt_counters *counters;
1943 struct xt_table_info *private = table->private; 1881 struct xt_table_info *private = table->private;
1944 void __user *pos; 1882 void __user *pos;
1945 unsigned int size; 1883 unsigned int size;
1946 int ret = 0; 1884 int ret = 0;
1947 void *loc_cpu_entry; 1885 void *loc_cpu_entry;
1886 unsigned int i = 0;
1948 1887
1949 counters = alloc_counters(table); 1888 counters = alloc_counters(table);
1950 if (IS_ERR(counters)) 1889 if (IS_ERR(counters))
@@ -1958,63 +1897,23 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1958 pos = userptr; 1897 pos = userptr;
1959 size = total_size; 1898 size = total_size;
1960 ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size, 1899 ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1961 compat_copy_entry_to_user, &pos, &size); 1900 compat_copy_entry_to_user,
1962 if (ret) 1901 &pos, &size, counters, &i);
1963 goto free_counters;
1964
1965 /* ... then go back and fix counters and names */
1966 for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
1967 unsigned int i;
1968 struct ipt_entry_match m;
1969 struct ipt_entry_target t;
1970 1902
1971 ret = -EFAULT;
1972 if (copy_from_user(&e, userptr + off,
1973 sizeof(struct compat_ipt_entry)))
1974 goto free_counters;
1975 if (copy_to_user(userptr + off +
1976 offsetof(struct compat_ipt_entry, counters),
1977 &counters[num], sizeof(counters[num])))
1978 goto free_counters;
1979
1980 for (i = sizeof(struct compat_ipt_entry);
1981 i < e.target_offset; i += m.u.match_size) {
1982 if (copy_from_user(&m, userptr + off + i,
1983 sizeof(struct ipt_entry_match)))
1984 goto free_counters;
1985 if (copy_to_user(userptr + off + i +
1986 offsetof(struct ipt_entry_match, u.user.name),
1987 m.u.kernel.match->name,
1988 strlen(m.u.kernel.match->name) + 1))
1989 goto free_counters;
1990 }
1991
1992 if (copy_from_user(&t, userptr + off + e.target_offset,
1993 sizeof(struct ipt_entry_target)))
1994 goto free_counters;
1995 if (copy_to_user(userptr + off + e.target_offset +
1996 offsetof(struct ipt_entry_target, u.user.name),
1997 t.u.kernel.target->name,
1998 strlen(t.u.kernel.target->name) + 1))
1999 goto free_counters;
2000 }
2001 ret = 0;
2002free_counters:
2003 vfree(counters); 1903 vfree(counters);
2004 return ret; 1904 return ret;
2005} 1905}
2006 1906
2007static int 1907static int
2008compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) 1908compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1909 int *len)
2009{ 1910{
2010 int ret; 1911 int ret;
2011 struct compat_ipt_get_entries get; 1912 struct compat_ipt_get_entries get;
2012 struct xt_table *t; 1913 struct xt_table *t;
2013 1914
2014
2015 if (*len < sizeof(get)) { 1915 if (*len < sizeof(get)) {
2016 duprintf("compat_get_entries: %u < %u\n", 1916 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
2017 *len, (unsigned int)sizeof(get));
2018 return -EINVAL; 1917 return -EINVAL;
2019 } 1918 }
2020 1919
@@ -2022,30 +1921,27 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
2022 return -EFAULT; 1921 return -EFAULT;
2023 1922
2024 if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { 1923 if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
2025 duprintf("compat_get_entries: %u != %u\n", *len, 1924 duprintf("compat_get_entries: %u != %zu\n",
2026 (unsigned int)(sizeof(struct compat_ipt_get_entries) + 1925 *len, sizeof(get) + get.size);
2027 get.size));
2028 return -EINVAL; 1926 return -EINVAL;
2029 } 1927 }
2030 1928
2031 xt_compat_lock(AF_INET); 1929 xt_compat_lock(AF_INET);
2032 t = xt_find_table_lock(AF_INET, get.name); 1930 t = xt_find_table_lock(net, AF_INET, get.name);
2033 if (t && !IS_ERR(t)) { 1931 if (t && !IS_ERR(t)) {
2034 struct xt_table_info *private = t->private; 1932 struct xt_table_info *private = t->private;
2035 struct xt_table_info info; 1933 struct xt_table_info info;
2036 duprintf("t->private->number = %u\n", 1934 duprintf("t->private->number = %u\n", private->number);
2037 private->number);
2038 ret = compat_table_info(private, &info); 1935 ret = compat_table_info(private, &info);
2039 if (!ret && get.size == info.size) { 1936 if (!ret && get.size == info.size) {
2040 ret = compat_copy_entries_to_user(private->size, 1937 ret = compat_copy_entries_to_user(private->size,
2041 t, uptr->entrytable); 1938 t, uptr->entrytable);
2042 } else if (!ret) { 1939 } else if (!ret) {
2043 duprintf("compat_get_entries: I've got %u not %u!\n", 1940 duprintf("compat_get_entries: I've got %u not %u!\n",
2044 private->size, 1941 private->size, get.size);
2045 get.size);
2046 ret = -EINVAL; 1942 ret = -EINVAL;
2047 } 1943 }
2048 compat_flush_offsets(); 1944 xt_compat_flush_offsets(AF_INET);
2049 module_put(t->me); 1945 module_put(t->me);
2050 xt_table_unlock(t); 1946 xt_table_unlock(t);
2051 } else 1947 } else
@@ -2067,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2067 1963
2068 switch (cmd) { 1964 switch (cmd) {
2069 case IPT_SO_GET_INFO: 1965 case IPT_SO_GET_INFO:
2070 ret = get_info(user, len, 1); 1966 ret = get_info(sk->sk_net, user, len, 1);
2071 break; 1967 break;
2072 case IPT_SO_GET_ENTRIES: 1968 case IPT_SO_GET_ENTRIES:
2073 ret = compat_get_entries(user, len); 1969 ret = compat_get_entries(sk->sk_net, user, len);
2074 break; 1970 break;
2075 default: 1971 default:
2076 ret = do_ipt_get_ctl(sk, cmd, user, len); 1972 ret = do_ipt_get_ctl(sk, cmd, user, len);
@@ -2080,7 +1976,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2080#endif 1976#endif
2081 1977
2082static int 1978static int
2083do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1979do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2084{ 1980{
2085 int ret; 1981 int ret;
2086 1982
@@ -2089,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2089 1985
2090 switch (cmd) { 1986 switch (cmd) {
2091 case IPT_SO_SET_REPLACE: 1987 case IPT_SO_SET_REPLACE:
2092 ret = do_replace(user, len); 1988 ret = do_replace(sk->sk_net, user, len);
2093 break; 1989 break;
2094 1990
2095 case IPT_SO_SET_ADD_COUNTERS: 1991 case IPT_SO_SET_ADD_COUNTERS:
2096 ret = do_add_counters(user, len, 0); 1992 ret = do_add_counters(sk->sk_net, user, len, 0);
2097 break; 1993 break;
2098 1994
2099 default: 1995 default:
@@ -2114,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2114 2010
2115 switch (cmd) { 2011 switch (cmd) {
2116 case IPT_SO_GET_INFO: 2012 case IPT_SO_GET_INFO:
2117 ret = get_info(user, len, 0); 2013 ret = get_info(sk->sk_net, user, len, 0);
2118 break; 2014 break;
2119 2015
2120 case IPT_SO_GET_ENTRIES: 2016 case IPT_SO_GET_ENTRIES:
2121 ret = get_entries(user, len); 2017 ret = get_entries(sk->sk_net, user, len);
2122 break; 2018 break;
2123 2019
2124 case IPT_SO_GET_REVISION_MATCH: 2020 case IPT_SO_GET_REVISION_MATCH:
@@ -2155,21 +2051,23 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2155 return ret; 2051 return ret;
2156} 2052}
2157 2053
2158int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) 2054struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2055 const struct ipt_replace *repl)
2159{ 2056{
2160 int ret; 2057 int ret;
2161 struct xt_table_info *newinfo; 2058 struct xt_table_info *newinfo;
2162 static struct xt_table_info bootstrap 2059 struct xt_table_info bootstrap
2163 = { 0, 0, 0, { 0 }, { 0 }, { } }; 2060 = { 0, 0, 0, { 0 }, { 0 }, { } };
2164 void *loc_cpu_entry; 2061 void *loc_cpu_entry;
2062 struct xt_table *new_table;
2165 2063
2166 newinfo = xt_alloc_table_info(repl->size); 2064 newinfo = xt_alloc_table_info(repl->size);
2167 if (!newinfo) 2065 if (!newinfo) {
2168 return -ENOMEM; 2066 ret = -ENOMEM;
2067 goto out;
2068 }
2169 2069
2170 /* choose the copy on our node/cpu 2070 /* choose the copy on our node/cpu, but dont care about preemption */
2171 * but dont care of preemption
2172 */
2173 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 2071 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2174 memcpy(loc_cpu_entry, repl->entries, repl->size); 2072 memcpy(loc_cpu_entry, repl->entries, repl->size);
2175 2073
@@ -2178,30 +2076,36 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
2178 repl->num_entries, 2076 repl->num_entries,
2179 repl->hook_entry, 2077 repl->hook_entry,
2180 repl->underflow); 2078 repl->underflow);
2181 if (ret != 0) { 2079 if (ret != 0)
2182 xt_free_table_info(newinfo); 2080 goto out_free;
2183 return ret;
2184 }
2185 2081
2186 ret = xt_register_table(table, &bootstrap, newinfo); 2082 new_table = xt_register_table(net, table, &bootstrap, newinfo);
2187 if (ret != 0) { 2083 if (IS_ERR(new_table)) {
2188 xt_free_table_info(newinfo); 2084 ret = PTR_ERR(new_table);
2189 return ret; 2085 goto out_free;
2190 } 2086 }
2191 2087
2192 return 0; 2088 return new_table;
2089
2090out_free:
2091 xt_free_table_info(newinfo);
2092out:
2093 return ERR_PTR(ret);
2193} 2094}
2194 2095
2195void ipt_unregister_table(struct xt_table *table) 2096void ipt_unregister_table(struct xt_table *table)
2196{ 2097{
2197 struct xt_table_info *private; 2098 struct xt_table_info *private;
2198 void *loc_cpu_entry; 2099 void *loc_cpu_entry;
2100 struct module *table_owner = table->me;
2199 2101
2200 private = xt_unregister_table(table); 2102 private = xt_unregister_table(table);
2201 2103
2202 /* Decrease module usage counts and free resources */ 2104 /* Decrease module usage counts and free resources */
2203 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 2105 loc_cpu_entry = private->entries[raw_smp_processor_id()];
2204 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); 2106 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2107 if (private->number > private->initial_entries)
2108 module_put(table_owner);
2205 xt_free_table_info(private); 2109 xt_free_table_info(private);
2206} 2110}
2207 2111
@@ -2211,7 +2115,8 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2211 u_int8_t type, u_int8_t code, 2115 u_int8_t type, u_int8_t code,
2212 bool invert) 2116 bool invert)
2213{ 2117{
2214 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code)) 2118 return ((test_type == 0xFF) ||
2119 (type == test_type && code >= min_code && code <= max_code))
2215 ^ invert; 2120 ^ invert;
2216} 2121}
2217 2122
@@ -2252,7 +2157,7 @@ icmp_match(const struct sk_buff *skb,
2252/* Called when user tries to insert an entry of this type. */ 2157/* Called when user tries to insert an entry of this type. */
2253static bool 2158static bool
2254icmp_checkentry(const char *tablename, 2159icmp_checkentry(const char *tablename,
2255 const void *info, 2160 const void *entry,
2256 const struct xt_match *match, 2161 const struct xt_match *match,
2257 void *matchinfo, 2162 void *matchinfo,
2258 unsigned int hook_mask) 2163 unsigned int hook_mask)
@@ -2303,16 +2208,31 @@ static struct xt_match icmp_matchstruct __read_mostly = {
2303 .name = "icmp", 2208 .name = "icmp",
2304 .match = icmp_match, 2209 .match = icmp_match,
2305 .matchsize = sizeof(struct ipt_icmp), 2210 .matchsize = sizeof(struct ipt_icmp),
2211 .checkentry = icmp_checkentry,
2306 .proto = IPPROTO_ICMP, 2212 .proto = IPPROTO_ICMP,
2307 .family = AF_INET, 2213 .family = AF_INET,
2308 .checkentry = icmp_checkentry, 2214};
2215
2216static int __net_init ip_tables_net_init(struct net *net)
2217{
2218 return xt_proto_init(net, AF_INET);
2219}
2220
2221static void __net_exit ip_tables_net_exit(struct net *net)
2222{
2223 xt_proto_fini(net, AF_INET);
2224}
2225
2226static struct pernet_operations ip_tables_net_ops = {
2227 .init = ip_tables_net_init,
2228 .exit = ip_tables_net_exit,
2309}; 2229};
2310 2230
2311static int __init ip_tables_init(void) 2231static int __init ip_tables_init(void)
2312{ 2232{
2313 int ret; 2233 int ret;
2314 2234
2315 ret = xt_proto_init(AF_INET); 2235 ret = register_pernet_subsys(&ip_tables_net_ops);
2316 if (ret < 0) 2236 if (ret < 0)
2317 goto err1; 2237 goto err1;
2318 2238
@@ -2342,7 +2262,7 @@ err4:
2342err3: 2262err3:
2343 xt_unregister_target(&ipt_standard_target); 2263 xt_unregister_target(&ipt_standard_target);
2344err2: 2264err2:
2345 xt_proto_fini(AF_INET); 2265 unregister_pernet_subsys(&ip_tables_net_ops);
2346err1: 2266err1:
2347 return ret; 2267 return ret;
2348} 2268}
@@ -2355,7 +2275,7 @@ static void __exit ip_tables_fini(void)
2355 xt_unregister_target(&ipt_error_target); 2275 xt_unregister_target(&ipt_error_target);
2356 xt_unregister_target(&ipt_standard_target); 2276 xt_unregister_target(&ipt_standard_target);
2357 2277
2358 xt_proto_fini(AF_INET); 2278 unregister_pernet_subsys(&ip_tables_net_ops);
2359} 2279}
2360 2280
2361EXPORT_SYMBOL(ipt_register_table); 2281EXPORT_SYMBOL(ipt_register_table);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2f544dac72df..c6cf84c77611 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -32,7 +32,7 @@
32 32
33MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 34MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
35MODULE_DESCRIPTION("iptables target for CLUSTERIP"); 35MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
36 36
37struct clusterip_config { 37struct clusterip_config {
38 struct list_head list; /* list of all configs */ 38 struct list_head list; /* list of all configs */
@@ -76,13 +76,6 @@ clusterip_config_put(struct clusterip_config *c)
76 kfree(c); 76 kfree(c);
77} 77}
78 78
79/* increase the count of entries(rules) using/referencing this config */
80static inline void
81clusterip_config_entry_get(struct clusterip_config *c)
82{
83 atomic_inc(&c->entries);
84}
85
86/* decrease the count of entries using/referencing this config. If last 79/* decrease the count of entries using/referencing this config. If last
87 * entry(rule) is removed, remove the config from lists, but don't free it 80 * entry(rule) is removed, remove the config from lists, but don't free it
88 * yet, since proc-files could still be holding references */ 81 * yet, since proc-files could still be holding references */
@@ -109,11 +102,9 @@ clusterip_config_entry_put(struct clusterip_config *c)
109static struct clusterip_config * 102static struct clusterip_config *
110__clusterip_config_find(__be32 clusterip) 103__clusterip_config_find(__be32 clusterip)
111{ 104{
112 struct list_head *pos; 105 struct clusterip_config *c;
113 106
114 list_for_each(pos, &clusterip_configs) { 107 list_for_each_entry(c, &clusterip_configs, list) {
115 struct clusterip_config *c = list_entry(pos,
116 struct clusterip_config, list);
117 if (c->clusterip == clusterip) 108 if (c->clusterip == clusterip)
118 return c; 109 return c;
119 } 110 }
@@ -275,7 +266,7 @@ clusterip_hashfn(const struct sk_buff *skb,
275 } 266 }
276 267
277 /* node numbers are 1..n, not 0..n */ 268 /* node numbers are 1..n, not 0..n */
278 return (hashval % config->num_total_nodes) + 1; 269 return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
279} 270}
280 271
281static inline int 272static inline int
@@ -289,12 +280,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
289 ***********************************************************************/ 280 ***********************************************************************/
290 281
291static unsigned int 282static unsigned int
292target(struct sk_buff *skb, 283clusterip_tg(struct sk_buff *skb, const struct net_device *in,
293 const struct net_device *in, 284 const struct net_device *out, unsigned int hooknum,
294 const struct net_device *out, 285 const struct xt_target *target, const void *targinfo)
295 unsigned int hooknum,
296 const struct xt_target *target,
297 const void *targinfo)
298{ 286{
299 const struct ipt_clusterip_tgt_info *cipinfo = targinfo; 287 const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
300 struct nf_conn *ct; 288 struct nf_conn *ct;
@@ -361,11 +349,9 @@ target(struct sk_buff *skb,
361} 349}
362 350
363static bool 351static bool
364checkentry(const char *tablename, 352clusterip_tg_check(const char *tablename, const void *e_void,
365 const void *e_void, 353 const struct xt_target *target, void *targinfo,
366 const struct xt_target *target, 354 unsigned int hook_mask)
367 void *targinfo,
368 unsigned int hook_mask)
369{ 355{
370 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 356 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
371 const struct ipt_entry *e = e_void; 357 const struct ipt_entry *e = e_void;
@@ -421,7 +407,7 @@ checkentry(const char *tablename,
421 407
422 if (nf_ct_l3proto_try_module_get(target->family) < 0) { 408 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
423 printk(KERN_WARNING "can't load conntrack support for " 409 printk(KERN_WARNING "can't load conntrack support for "
424 "proto=%d\n", target->family); 410 "proto=%u\n", target->family);
425 return false; 411 return false;
426 } 412 }
427 413
@@ -429,7 +415,7 @@ checkentry(const char *tablename,
429} 415}
430 416
431/* drop reference count of cluster config when rule is deleted */ 417/* drop reference count of cluster config when rule is deleted */
432static void destroy(const struct xt_target *target, void *targinfo) 418static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
433{ 419{
434 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 420 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
435 421
@@ -456,12 +442,12 @@ struct compat_ipt_clusterip_tgt_info
456}; 442};
457#endif /* CONFIG_COMPAT */ 443#endif /* CONFIG_COMPAT */
458 444
459static struct xt_target clusterip_tgt __read_mostly = { 445static struct xt_target clusterip_tg_reg __read_mostly = {
460 .name = "CLUSTERIP", 446 .name = "CLUSTERIP",
461 .family = AF_INET, 447 .family = AF_INET,
462 .target = target, 448 .target = clusterip_tg,
463 .checkentry = checkentry, 449 .checkentry = clusterip_tg_check,
464 .destroy = destroy, 450 .destroy = clusterip_tg_destroy,
465 .targetsize = sizeof(struct ipt_clusterip_tgt_info), 451 .targetsize = sizeof(struct ipt_clusterip_tgt_info),
466#ifdef CONFIG_COMPAT 452#ifdef CONFIG_COMPAT
467 .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), 453 .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
@@ -558,7 +544,7 @@ arp_mangle(unsigned int hook,
558 return NF_ACCEPT; 544 return NF_ACCEPT;
559} 545}
560 546
561static struct nf_hook_ops cip_arp_ops = { 547static struct nf_hook_ops cip_arp_ops __read_mostly = {
562 .hook = arp_mangle, 548 .hook = arp_mangle,
563 .pf = NF_ARP, 549 .pf = NF_ARP,
564 .hooknum = NF_ARP_OUT, 550 .hooknum = NF_ARP_OUT,
@@ -714,11 +700,11 @@ static const struct file_operations clusterip_proc_fops = {
714 700
715#endif /* CONFIG_PROC_FS */ 701#endif /* CONFIG_PROC_FS */
716 702
717static int __init ipt_clusterip_init(void) 703static int __init clusterip_tg_init(void)
718{ 704{
719 int ret; 705 int ret;
720 706
721 ret = xt_register_target(&clusterip_tgt); 707 ret = xt_register_target(&clusterip_tg_reg);
722 if (ret < 0) 708 if (ret < 0)
723 return ret; 709 return ret;
724 710
@@ -744,11 +730,11 @@ cleanup_hook:
744 nf_unregister_hook(&cip_arp_ops); 730 nf_unregister_hook(&cip_arp_ops);
745#endif /* CONFIG_PROC_FS */ 731#endif /* CONFIG_PROC_FS */
746cleanup_target: 732cleanup_target:
747 xt_unregister_target(&clusterip_tgt); 733 xt_unregister_target(&clusterip_tg_reg);
748 return ret; 734 return ret;
749} 735}
750 736
751static void __exit ipt_clusterip_fini(void) 737static void __exit clusterip_tg_exit(void)
752{ 738{
753 printk(KERN_NOTICE "ClusterIP Version %s unloading\n", 739 printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
754 CLUSTERIP_VERSION); 740 CLUSTERIP_VERSION);
@@ -756,8 +742,8 @@ static void __exit ipt_clusterip_fini(void)
756 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); 742 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
757#endif 743#endif
758 nf_unregister_hook(&cip_arp_ops); 744 nf_unregister_hook(&cip_arp_ops);
759 xt_unregister_target(&clusterip_tgt); 745 xt_unregister_target(&clusterip_tg_reg);
760} 746}
761 747
762module_init(ipt_clusterip_init); 748module_init(clusterip_tg_init);
763module_exit(ipt_clusterip_fini); 749module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index add110060a22..21395bc2b27f 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -21,7 +21,7 @@
21 21
22MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
24MODULE_DESCRIPTION("iptables ECN modification module"); 24MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification");
25 25
26/* set ECT codepoint from IP header. 26/* set ECT codepoint from IP header.
27 * return false if there was an error. */ 27 * return false if there was an error. */
@@ -38,7 +38,7 @@ set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
38 oldtos = iph->tos; 38 oldtos = iph->tos;
39 iph->tos &= ~IPT_ECN_IP_MASK; 39 iph->tos &= ~IPT_ECN_IP_MASK;
40 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); 40 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
41 nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); 41 csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
42 } 42 }
43 return true; 43 return true;
44} 44}
@@ -71,18 +71,15 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
71 if (einfo->operation & IPT_ECN_OP_SET_CWR) 71 if (einfo->operation & IPT_ECN_OP_SET_CWR)
72 tcph->cwr = einfo->proto.tcp.cwr; 72 tcph->cwr = einfo->proto.tcp.cwr;
73 73
74 nf_proto_csum_replace2(&tcph->check, skb, 74 inet_proto_csum_replace2(&tcph->check, skb,
75 oldval, ((__be16 *)tcph)[6], 0); 75 oldval, ((__be16 *)tcph)[6], 0);
76 return true; 76 return true;
77} 77}
78 78
79static unsigned int 79static unsigned int
80target(struct sk_buff *skb, 80ecn_tg(struct sk_buff *skb, const struct net_device *in,
81 const struct net_device *in, 81 const struct net_device *out, unsigned int hooknum,
82 const struct net_device *out, 82 const struct xt_target *target, const void *targinfo)
83 unsigned int hooknum,
84 const struct xt_target *target,
85 const void *targinfo)
86{ 83{
87 const struct ipt_ECN_info *einfo = targinfo; 84 const struct ipt_ECN_info *einfo = targinfo;
88 85
@@ -99,11 +96,9 @@ target(struct sk_buff *skb,
99} 96}
100 97
101static bool 98static bool
102checkentry(const char *tablename, 99ecn_tg_check(const char *tablename, const void *e_void,
103 const void *e_void, 100 const struct xt_target *target, void *targinfo,
104 const struct xt_target *target, 101 unsigned int hook_mask)
105 void *targinfo,
106 unsigned int hook_mask)
107{ 102{
108 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; 103 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
109 const struct ipt_entry *e = e_void; 104 const struct ipt_entry *e = e_void;
@@ -127,25 +122,25 @@ checkentry(const char *tablename,
127 return true; 122 return true;
128} 123}
129 124
130static struct xt_target ipt_ecn_reg __read_mostly = { 125static struct xt_target ecn_tg_reg __read_mostly = {
131 .name = "ECN", 126 .name = "ECN",
132 .family = AF_INET, 127 .family = AF_INET,
133 .target = target, 128 .target = ecn_tg,
134 .targetsize = sizeof(struct ipt_ECN_info), 129 .targetsize = sizeof(struct ipt_ECN_info),
135 .table = "mangle", 130 .table = "mangle",
136 .checkentry = checkentry, 131 .checkentry = ecn_tg_check,
137 .me = THIS_MODULE, 132 .me = THIS_MODULE,
138}; 133};
139 134
140static int __init ipt_ecn_init(void) 135static int __init ecn_tg_init(void)
141{ 136{
142 return xt_register_target(&ipt_ecn_reg); 137 return xt_register_target(&ecn_tg_reg);
143} 138}
144 139
145static void __exit ipt_ecn_fini(void) 140static void __exit ecn_tg_exit(void)
146{ 141{
147 xt_unregister_target(&ipt_ecn_reg); 142 xt_unregister_target(&ecn_tg_reg);
148} 143}
149 144
150module_init(ipt_ecn_init); 145module_init(ecn_tg_init);
151module_exit(ipt_ecn_fini); 146module_exit(ecn_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 4b5e8216a4e7..b38d7850f506 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -22,10 +22,11 @@
22#include <linux/netfilter.h> 22#include <linux/netfilter.h>
23#include <linux/netfilter/x_tables.h> 23#include <linux/netfilter/x_tables.h>
24#include <linux/netfilter_ipv4/ipt_LOG.h> 24#include <linux/netfilter_ipv4/ipt_LOG.h>
25#include <net/netfilter/nf_log.h>
25 26
26MODULE_LICENSE("GPL"); 27MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 28MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
28MODULE_DESCRIPTION("iptables syslog logging module"); 29MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
29 30
30/* Use lock to serialize, so printks don't overlap */ 31/* Use lock to serialize, so printks don't overlap */
31static DEFINE_SPINLOCK(log_lock); 32static DEFINE_SPINLOCK(log_lock);
@@ -337,7 +338,9 @@ static void dump_packet(const struct nf_loginfo *info,
337 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { 338 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
338 read_lock_bh(&skb->sk->sk_callback_lock); 339 read_lock_bh(&skb->sk->sk_callback_lock);
339 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 340 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
340 printk("UID=%u ", skb->sk->sk_socket->file->f_uid); 341 printk("UID=%u GID=%u",
342 skb->sk->sk_socket->file->f_uid,
343 skb->sk->sk_socket->file->f_gid);
341 read_unlock_bh(&skb->sk->sk_callback_lock); 344 read_unlock_bh(&skb->sk->sk_callback_lock);
342 } 345 }
343 346
@@ -418,12 +421,9 @@ ipt_log_packet(unsigned int pf,
418} 421}
419 422
420static unsigned int 423static unsigned int
421ipt_log_target(struct sk_buff *skb, 424log_tg(struct sk_buff *skb, const struct net_device *in,
422 const struct net_device *in, 425 const struct net_device *out, unsigned int hooknum,
423 const struct net_device *out, 426 const struct xt_target *target, const void *targinfo)
424 unsigned int hooknum,
425 const struct xt_target *target,
426 const void *targinfo)
427{ 427{
428 const struct ipt_log_info *loginfo = targinfo; 428 const struct ipt_log_info *loginfo = targinfo;
429 struct nf_loginfo li; 429 struct nf_loginfo li;
@@ -437,11 +437,10 @@ ipt_log_target(struct sk_buff *skb,
437 return XT_CONTINUE; 437 return XT_CONTINUE;
438} 438}
439 439
440static bool ipt_log_checkentry(const char *tablename, 440static bool
441 const void *e, 441log_tg_check(const char *tablename, const void *e,
442 const struct xt_target *target, 442 const struct xt_target *target, void *targinfo,
443 void *targinfo, 443 unsigned int hook_mask)
444 unsigned int hook_mask)
445{ 444{
446 const struct ipt_log_info *loginfo = targinfo; 445 const struct ipt_log_info *loginfo = targinfo;
447 446
@@ -457,37 +456,37 @@ static bool ipt_log_checkentry(const char *tablename,
457 return true; 456 return true;
458} 457}
459 458
460static struct xt_target ipt_log_reg __read_mostly = { 459static struct xt_target log_tg_reg __read_mostly = {
461 .name = "LOG", 460 .name = "LOG",
462 .family = AF_INET, 461 .family = AF_INET,
463 .target = ipt_log_target, 462 .target = log_tg,
464 .targetsize = sizeof(struct ipt_log_info), 463 .targetsize = sizeof(struct ipt_log_info),
465 .checkentry = ipt_log_checkentry, 464 .checkentry = log_tg_check,
466 .me = THIS_MODULE, 465 .me = THIS_MODULE,
467}; 466};
468 467
469static struct nf_logger ipt_log_logger ={ 468static const struct nf_logger ipt_log_logger ={
470 .name = "ipt_LOG", 469 .name = "ipt_LOG",
471 .logfn = &ipt_log_packet, 470 .logfn = &ipt_log_packet,
472 .me = THIS_MODULE, 471 .me = THIS_MODULE,
473}; 472};
474 473
475static int __init ipt_log_init(void) 474static int __init log_tg_init(void)
476{ 475{
477 int ret; 476 int ret;
478 477
479 ret = xt_register_target(&ipt_log_reg); 478 ret = xt_register_target(&log_tg_reg);
480 if (ret < 0) 479 if (ret < 0)
481 return ret; 480 return ret;
482 nf_log_register(PF_INET, &ipt_log_logger); 481 nf_log_register(PF_INET, &ipt_log_logger);
483 return 0; 482 return 0;
484} 483}
485 484
486static void __exit ipt_log_fini(void) 485static void __exit log_tg_exit(void)
487{ 486{
488 nf_log_unregister(&ipt_log_logger); 487 nf_log_unregister(&ipt_log_logger);
489 xt_unregister_target(&ipt_log_reg); 488 xt_unregister_target(&log_tg_reg);
490} 489}
491 490
492module_init(ipt_log_init); 491module_init(log_tg_init);
493module_exit(ipt_log_fini); 492module_exit(log_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 44b516e7cb79..d80fee8327e4 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -25,18 +25,16 @@
25 25
26MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
28MODULE_DESCRIPTION("iptables MASQUERADE target module"); 28MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
29 29
30/* Lock protects masq region inside conntrack */ 30/* Lock protects masq region inside conntrack */
31static DEFINE_RWLOCK(masq_lock); 31static DEFINE_RWLOCK(masq_lock);
32 32
33/* FIXME: Multiple targets. --RR */ 33/* FIXME: Multiple targets. --RR */
34static bool 34static bool
35masquerade_check(const char *tablename, 35masquerade_tg_check(const char *tablename, const void *e,
36 const void *e, 36 const struct xt_target *target, void *targinfo,
37 const struct xt_target *target, 37 unsigned int hook_mask)
38 void *targinfo,
39 unsigned int hook_mask)
40{ 38{
41 const struct nf_nat_multi_range_compat *mr = targinfo; 39 const struct nf_nat_multi_range_compat *mr = targinfo;
42 40
@@ -52,12 +50,9 @@ masquerade_check(const char *tablename,
52} 50}
53 51
54static unsigned int 52static unsigned int
55masquerade_target(struct sk_buff *skb, 53masquerade_tg(struct sk_buff *skb, const struct net_device *in,
56 const struct net_device *in, 54 const struct net_device *out, unsigned int hooknum,
57 const struct net_device *out, 55 const struct xt_target *target, const void *targinfo)
58 unsigned int hooknum,
59 const struct xt_target *target,
60 const void *targinfo)
61{ 56{
62 struct nf_conn *ct; 57 struct nf_conn *ct;
63 struct nf_conn_nat *nat; 58 struct nf_conn_nat *nat;
@@ -67,7 +62,7 @@ masquerade_target(struct sk_buff *skb,
67 const struct rtable *rt; 62 const struct rtable *rt;
68 __be32 newsrc; 63 __be32 newsrc;
69 64
70 NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); 65 NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
71 66
72 ct = nf_ct_get(skb, &ctinfo); 67 ct = nf_ct_get(skb, &ctinfo);
73 nat = nfct_nat(ct); 68 nat = nfct_nat(ct);
@@ -100,7 +95,7 @@ masquerade_target(struct sk_buff *skb,
100 mr->range[0].min, mr->range[0].max }); 95 mr->range[0].min, mr->range[0].max });
101 96
102 /* Hand modified range to generic setup. */ 97 /* Hand modified range to generic setup. */
103 return nf_nat_setup_info(ct, &newrange, hooknum); 98 return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
104} 99}
105 100
106static int 101static int
@@ -166,22 +161,22 @@ static struct notifier_block masq_inet_notifier = {
166 .notifier_call = masq_inet_event, 161 .notifier_call = masq_inet_event,
167}; 162};
168 163
169static struct xt_target masquerade __read_mostly = { 164static struct xt_target masquerade_tg_reg __read_mostly = {
170 .name = "MASQUERADE", 165 .name = "MASQUERADE",
171 .family = AF_INET, 166 .family = AF_INET,
172 .target = masquerade_target, 167 .target = masquerade_tg,
173 .targetsize = sizeof(struct nf_nat_multi_range_compat), 168 .targetsize = sizeof(struct nf_nat_multi_range_compat),
174 .table = "nat", 169 .table = "nat",
175 .hooks = 1 << NF_IP_POST_ROUTING, 170 .hooks = 1 << NF_INET_POST_ROUTING,
176 .checkentry = masquerade_check, 171 .checkentry = masquerade_tg_check,
177 .me = THIS_MODULE, 172 .me = THIS_MODULE,
178}; 173};
179 174
180static int __init ipt_masquerade_init(void) 175static int __init masquerade_tg_init(void)
181{ 176{
182 int ret; 177 int ret;
183 178
184 ret = xt_register_target(&masquerade); 179 ret = xt_register_target(&masquerade_tg_reg);
185 180
186 if (ret == 0) { 181 if (ret == 0) {
187 /* Register for device down reports */ 182 /* Register for device down reports */
@@ -193,12 +188,12 @@ static int __init ipt_masquerade_init(void)
193 return ret; 188 return ret;
194} 189}
195 190
196static void __exit ipt_masquerade_fini(void) 191static void __exit masquerade_tg_exit(void)
197{ 192{
198 xt_unregister_target(&masquerade); 193 xt_unregister_target(&masquerade_tg_reg);
199 unregister_netdevice_notifier(&masq_dev_notifier); 194 unregister_netdevice_notifier(&masq_dev_notifier);
200 unregister_inetaddr_notifier(&masq_inet_notifier); 195 unregister_inetaddr_notifier(&masq_inet_notifier);
201} 196}
202 197
203module_init(ipt_masquerade_init); 198module_init(masquerade_tg_init);
204module_exit(ipt_masquerade_fini); 199module_exit(masquerade_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f8699291e33d..6739abfd1521 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -20,14 +20,12 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); 22MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
23MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target"); 23MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
24 24
25static bool 25static bool
26check(const char *tablename, 26netmap_tg_check(const char *tablename, const void *e,
27 const void *e, 27 const struct xt_target *target, void *targinfo,
28 const struct xt_target *target, 28 unsigned int hook_mask)
29 void *targinfo,
30 unsigned int hook_mask)
31{ 29{
32 const struct nf_nat_multi_range_compat *mr = targinfo; 30 const struct nf_nat_multi_range_compat *mr = targinfo;
33 31
@@ -43,12 +41,9 @@ check(const char *tablename,
43} 41}
44 42
45static unsigned int 43static unsigned int
46target(struct sk_buff *skb, 44netmap_tg(struct sk_buff *skb, const struct net_device *in,
47 const struct net_device *in, 45 const struct net_device *out, unsigned int hooknum,
48 const struct net_device *out, 46 const struct xt_target *target, const void *targinfo)
49 unsigned int hooknum,
50 const struct xt_target *target,
51 const void *targinfo)
52{ 47{
53 struct nf_conn *ct; 48 struct nf_conn *ct;
54 enum ip_conntrack_info ctinfo; 49 enum ip_conntrack_info ctinfo;
@@ -56,14 +51,14 @@ target(struct sk_buff *skb,
56 const struct nf_nat_multi_range_compat *mr = targinfo; 51 const struct nf_nat_multi_range_compat *mr = targinfo;
57 struct nf_nat_range newrange; 52 struct nf_nat_range newrange;
58 53
59 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING 54 NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
60 || hooknum == NF_IP_POST_ROUTING 55 || hooknum == NF_INET_POST_ROUTING
61 || hooknum == NF_IP_LOCAL_OUT); 56 || hooknum == NF_INET_LOCAL_OUT);
62 ct = nf_ct_get(skb, &ctinfo); 57 ct = nf_ct_get(skb, &ctinfo);
63 58
64 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); 59 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
65 60
66 if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) 61 if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT)
67 new_ip = ip_hdr(skb)->daddr & ~netmask; 62 new_ip = ip_hdr(skb)->daddr & ~netmask;
68 else 63 else
69 new_ip = ip_hdr(skb)->saddr & ~netmask; 64 new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -75,30 +70,31 @@ target(struct sk_buff *skb,
75 mr->range[0].min, mr->range[0].max }); 70 mr->range[0].min, mr->range[0].max });
76 71
77 /* Hand modified range to generic setup. */ 72 /* Hand modified range to generic setup. */
78 return nf_nat_setup_info(ct, &newrange, hooknum); 73 return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum));
79} 74}
80 75
81static struct xt_target target_module __read_mostly = { 76static struct xt_target netmap_tg_reg __read_mostly = {
82 .name = "NETMAP", 77 .name = "NETMAP",
83 .family = AF_INET, 78 .family = AF_INET,
84 .target = target, 79 .target = netmap_tg,
85 .targetsize = sizeof(struct nf_nat_multi_range_compat), 80 .targetsize = sizeof(struct nf_nat_multi_range_compat),
86 .table = "nat", 81 .table = "nat",
87 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | 82 .hooks = (1 << NF_INET_PRE_ROUTING) |
88 (1 << NF_IP_LOCAL_OUT), 83 (1 << NF_INET_POST_ROUTING) |
89 .checkentry = check, 84 (1 << NF_INET_LOCAL_OUT),
85 .checkentry = netmap_tg_check,
90 .me = THIS_MODULE 86 .me = THIS_MODULE
91}; 87};
92 88
93static int __init ipt_netmap_init(void) 89static int __init netmap_tg_init(void)
94{ 90{
95 return xt_register_target(&target_module); 91 return xt_register_target(&netmap_tg_reg);
96} 92}
97 93
98static void __exit ipt_netmap_fini(void) 94static void __exit netmap_tg_exit(void)
99{ 95{
100 xt_unregister_target(&target_module); 96 xt_unregister_target(&netmap_tg_reg);
101} 97}
102 98
103module_init(ipt_netmap_init); 99module_init(netmap_tg_init);
104module_exit(ipt_netmap_fini); 100module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f7cf7d61a2d4..5c6292449d13 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -23,15 +23,13 @@
23 23
24MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 25MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
26MODULE_DESCRIPTION("iptables REDIRECT target module"); 26MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
27 27
28/* FIXME: Take multiple ranges --RR */ 28/* FIXME: Take multiple ranges --RR */
29static bool 29static bool
30redirect_check(const char *tablename, 30redirect_tg_check(const char *tablename, const void *e,
31 const void *e, 31 const struct xt_target *target, void *targinfo,
32 const struct xt_target *target, 32 unsigned int hook_mask)
33 void *targinfo,
34 unsigned int hook_mask)
35{ 33{
36 const struct nf_nat_multi_range_compat *mr = targinfo; 34 const struct nf_nat_multi_range_compat *mr = targinfo;
37 35
@@ -47,12 +45,9 @@ redirect_check(const char *tablename,
47} 45}
48 46
49static unsigned int 47static unsigned int
50redirect_target(struct sk_buff *skb, 48redirect_tg(struct sk_buff *skb, const struct net_device *in,
51 const struct net_device *in, 49 const struct net_device *out, unsigned int hooknum,
52 const struct net_device *out, 50 const struct xt_target *target, const void *targinfo)
53 unsigned int hooknum,
54 const struct xt_target *target,
55 const void *targinfo)
56{ 51{
57 struct nf_conn *ct; 52 struct nf_conn *ct;
58 enum ip_conntrack_info ctinfo; 53 enum ip_conntrack_info ctinfo;
@@ -60,14 +55,14 @@ redirect_target(struct sk_buff *skb,
60 const struct nf_nat_multi_range_compat *mr = targinfo; 55 const struct nf_nat_multi_range_compat *mr = targinfo;
61 struct nf_nat_range newrange; 56 struct nf_nat_range newrange;
62 57
63 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING 58 NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
64 || hooknum == NF_IP_LOCAL_OUT); 59 || hooknum == NF_INET_LOCAL_OUT);
65 60
66 ct = nf_ct_get(skb, &ctinfo); 61 ct = nf_ct_get(skb, &ctinfo);
67 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); 62 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
68 63
69 /* Local packets: make them go to loopback */ 64 /* Local packets: make them go to loopback */
70 if (hooknum == NF_IP_LOCAL_OUT) 65 if (hooknum == NF_INET_LOCAL_OUT)
71 newdst = htonl(0x7F000001); 66 newdst = htonl(0x7F000001);
72 else { 67 else {
73 struct in_device *indev; 68 struct in_device *indev;
@@ -92,29 +87,29 @@ redirect_target(struct sk_buff *skb,
92 mr->range[0].min, mr->range[0].max }); 87 mr->range[0].min, mr->range[0].max });
93 88
94 /* Hand modified range to generic setup. */ 89 /* Hand modified range to generic setup. */
95 return nf_nat_setup_info(ct, &newrange, hooknum); 90 return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
96} 91}
97 92
98static struct xt_target redirect_reg __read_mostly = { 93static struct xt_target redirect_tg_reg __read_mostly = {
99 .name = "REDIRECT", 94 .name = "REDIRECT",
100 .family = AF_INET, 95 .family = AF_INET,
101 .target = redirect_target, 96 .target = redirect_tg,
102 .targetsize = sizeof(struct nf_nat_multi_range_compat), 97 .targetsize = sizeof(struct nf_nat_multi_range_compat),
103 .table = "nat", 98 .table = "nat",
104 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), 99 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
105 .checkentry = redirect_check, 100 .checkentry = redirect_tg_check,
106 .me = THIS_MODULE, 101 .me = THIS_MODULE,
107}; 102};
108 103
109static int __init ipt_redirect_init(void) 104static int __init redirect_tg_init(void)
110{ 105{
111 return xt_register_target(&redirect_reg); 106 return xt_register_target(&redirect_tg_reg);
112} 107}
113 108
114static void __exit ipt_redirect_fini(void) 109static void __exit redirect_tg_exit(void)
115{ 110{
116 xt_unregister_target(&redirect_reg); 111 xt_unregister_target(&redirect_tg_reg);
117} 112}
118 113
119module_init(ipt_redirect_init); 114module_init(redirect_tg_init);
120module_exit(ipt_redirect_fini); 115module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index dcf4d21d5116..22606e2baa16 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -29,17 +29,14 @@
29 29
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
32MODULE_DESCRIPTION("iptables REJECT target module"); 32MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
33 33
34/* Send RST reply */ 34/* Send RST reply */
35static void send_reset(struct sk_buff *oldskb, int hook) 35static void send_reset(struct sk_buff *oldskb, int hook)
36{ 36{
37 struct sk_buff *nskb; 37 struct sk_buff *nskb;
38 struct iphdr *niph; 38 struct iphdr *oiph, *niph;
39 struct tcphdr _otcph, *oth, *tcph; 39 struct tcphdr _otcph, *oth, *tcph;
40 __be16 tmp_port;
41 __be32 tmp_addr;
42 int needs_ack;
43 unsigned int addr_type; 40 unsigned int addr_type;
44 41
45 /* IP header checks: fragment. */ 42 /* IP header checks: fragment. */
@@ -58,99 +55,73 @@ static void send_reset(struct sk_buff *oldskb, int hook)
58 /* Check checksum */ 55 /* Check checksum */
59 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) 56 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
60 return; 57 return;
58 oiph = ip_hdr(oldskb);
61 59
62 /* We need a linear, writeable skb. We also need to expand 60 nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
63 headroom in case hh_len of incoming interface < hh_len of 61 LL_MAX_HEADER, GFP_ATOMIC);
64 outgoing interface */
65 nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
66 GFP_ATOMIC);
67 if (!nskb) 62 if (!nskb)
68 return; 63 return;
69 64
70 /* This packet will not be the same as the other: clear nf fields */ 65 skb_reserve(nskb, LL_MAX_HEADER);
71 nf_reset(nskb); 66
72 nskb->mark = 0; 67 skb_reset_network_header(nskb);
73 skb_init_secmark(nskb); 68 niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
74 69 niph->version = 4;
75 skb_shinfo(nskb)->gso_size = 0; 70 niph->ihl = sizeof(struct iphdr) / 4;
76 skb_shinfo(nskb)->gso_segs = 0; 71 niph->tos = 0;
77 skb_shinfo(nskb)->gso_type = 0; 72 niph->id = 0;
78 73 niph->frag_off = htons(IP_DF);
79 tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb)); 74 niph->protocol = IPPROTO_TCP;
80 75 niph->check = 0;
81 /* Swap source and dest */ 76 niph->saddr = oiph->daddr;
82 niph = ip_hdr(nskb); 77 niph->daddr = oiph->saddr;
83 tmp_addr = niph->saddr; 78
84 niph->saddr = niph->daddr; 79 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
85 niph->daddr = tmp_addr; 80 memset(tcph, 0, sizeof(*tcph));
86 tmp_port = tcph->source; 81 tcph->source = oth->dest;
87 tcph->source = tcph->dest; 82 tcph->dest = oth->source;
88 tcph->dest = tmp_port; 83 tcph->doff = sizeof(struct tcphdr) / 4;
89 84
90 /* Truncate to length (no data) */ 85 if (oth->ack)
91 tcph->doff = sizeof(struct tcphdr)/4;
92 skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
93 niph->tot_len = htons(nskb->len);
94
95 if (tcph->ack) {
96 needs_ack = 0;
97 tcph->seq = oth->ack_seq; 86 tcph->seq = oth->ack_seq;
98 tcph->ack_seq = 0; 87 else {
99 } else {
100 needs_ack = 1;
101 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + 88 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
102 oldskb->len - ip_hdrlen(oldskb) - 89 oldskb->len - ip_hdrlen(oldskb) -
103 (oth->doff << 2)); 90 (oth->doff << 2));
104 tcph->seq = 0; 91 tcph->ack = 1;
105 } 92 }
106 93
107 /* Reset flags */ 94 tcph->rst = 1;
108 ((u_int8_t *)tcph)[13] = 0; 95 tcph->check = tcp_v4_check(sizeof(struct tcphdr),
109 tcph->rst = 1; 96 niph->saddr, niph->daddr,
110 tcph->ack = needs_ack; 97 csum_partial(tcph,
111 98 sizeof(struct tcphdr), 0));
112 tcph->window = 0;
113 tcph->urg_ptr = 0;
114
115 /* Adjust TCP checksum */
116 tcph->check = 0;
117 tcph->check = tcp_v4_check(sizeof(struct tcphdr),
118 niph->saddr, niph->daddr,
119 csum_partial(tcph,
120 sizeof(struct tcphdr), 0));
121
122 /* Set DF, id = 0 */
123 niph->frag_off = htons(IP_DF);
124 niph->id = 0;
125 99
126 addr_type = RTN_UNSPEC; 100 addr_type = RTN_UNSPEC;
127 if (hook != NF_IP_FORWARD 101 if (hook != NF_INET_FORWARD
128#ifdef CONFIG_BRIDGE_NETFILTER 102#ifdef CONFIG_BRIDGE_NETFILTER
129 || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) 103 || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
130#endif 104#endif
131 ) 105 )
132 addr_type = RTN_LOCAL; 106 addr_type = RTN_LOCAL;
133 107
108 /* ip_route_me_harder expects skb->dst to be set */
109 dst_hold(oldskb->dst);
110 nskb->dst = oldskb->dst;
111
134 if (ip_route_me_harder(nskb, addr_type)) 112 if (ip_route_me_harder(nskb, addr_type))
135 goto free_nskb; 113 goto free_nskb;
136 114
115 niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
137 nskb->ip_summed = CHECKSUM_NONE; 116 nskb->ip_summed = CHECKSUM_NONE;
138 117
139 /* Adjust IP TTL */
140 niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
141
142 /* Adjust IP checksum */
143 niph->check = 0;
144 niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
145
146 /* "Never happens" */ 118 /* "Never happens" */
147 if (nskb->len > dst_mtu(nskb->dst)) 119 if (nskb->len > dst_mtu(nskb->dst))
148 goto free_nskb; 120 goto free_nskb;
149 121
150 nf_ct_attach(nskb, oldskb); 122 nf_ct_attach(nskb, oldskb);
151 123
152 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev, 124 ip_local_out(nskb);
153 dst_output);
154 return; 125 return;
155 126
156 free_nskb: 127 free_nskb:
@@ -162,20 +133,13 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
162 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); 133 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
163} 134}
164 135
165static unsigned int reject(struct sk_buff *skb, 136static unsigned int
166 const struct net_device *in, 137reject_tg(struct sk_buff *skb, const struct net_device *in,
167 const struct net_device *out, 138 const struct net_device *out, unsigned int hooknum,
168 unsigned int hooknum, 139 const struct xt_target *target, const void *targinfo)
169 const struct xt_target *target,
170 const void *targinfo)
171{ 140{
172 const struct ipt_reject_info *reject = targinfo; 141 const struct ipt_reject_info *reject = targinfo;
173 142
174 /* Our naive response construction doesn't deal with IP
175 options, and probably shouldn't try. */
176 if (ip_hdrlen(skb) != sizeof(struct iphdr))
177 return NF_DROP;
178
179 /* WARNING: This code causes reentry within iptables. 143 /* WARNING: This code causes reentry within iptables.
180 This means that the iptables jump stack is now crap. We 144 This means that the iptables jump stack is now crap. We
181 must return an absolute verdict. --RR */ 145 must return an absolute verdict. --RR */
@@ -211,11 +175,10 @@ static unsigned int reject(struct sk_buff *skb,
211 return NF_DROP; 175 return NF_DROP;
212} 176}
213 177
214static bool check(const char *tablename, 178static bool
215 const void *e_void, 179reject_tg_check(const char *tablename, const void *e_void,
216 const struct xt_target *target, 180 const struct xt_target *target, void *targinfo,
217 void *targinfo, 181 unsigned int hook_mask)
218 unsigned int hook_mask)
219{ 182{
220 const struct ipt_reject_info *rejinfo = targinfo; 183 const struct ipt_reject_info *rejinfo = targinfo;
221 const struct ipt_entry *e = e_void; 184 const struct ipt_entry *e = e_void;
@@ -234,27 +197,27 @@ static bool check(const char *tablename,
234 return true; 197 return true;
235} 198}
236 199
237static struct xt_target ipt_reject_reg __read_mostly = { 200static struct xt_target reject_tg_reg __read_mostly = {
238 .name = "REJECT", 201 .name = "REJECT",
239 .family = AF_INET, 202 .family = AF_INET,
240 .target = reject, 203 .target = reject_tg,
241 .targetsize = sizeof(struct ipt_reject_info), 204 .targetsize = sizeof(struct ipt_reject_info),
242 .table = "filter", 205 .table = "filter",
243 .hooks = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | 206 .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
244 (1 << NF_IP_LOCAL_OUT), 207 (1 << NF_INET_LOCAL_OUT),
245 .checkentry = check, 208 .checkentry = reject_tg_check,
246 .me = THIS_MODULE, 209 .me = THIS_MODULE,
247}; 210};
248 211
249static int __init ipt_reject_init(void) 212static int __init reject_tg_init(void)
250{ 213{
251 return xt_register_target(&ipt_reject_reg); 214 return xt_register_target(&reject_tg_reg);
252} 215}
253 216
254static void __exit ipt_reject_fini(void) 217static void __exit reject_tg_exit(void)
255{ 218{
256 xt_unregister_target(&ipt_reject_reg); 219 xt_unregister_target(&reject_tg_reg);
257} 220}
258 221
259module_init(ipt_reject_init); 222module_init(reject_tg_init);
260module_exit(ipt_reject_fini); 223module_exit(reject_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
deleted file mode 100644
index 8988571436b8..000000000000
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ /dev/null
@@ -1,179 +0,0 @@
1/* Same. Just like SNAT, only try to make the connections
2 * between client A and server B always have the same source ip.
3 *
4 * (C) 2000 Paul `Rusty' Russell
5 * (C) 2001 Martin Josefsson
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/types.h>
12#include <linux/ip.h>
13#include <linux/timer.h>
14#include <linux/module.h>
15#include <linux/netfilter.h>
16#include <linux/netdevice.h>
17#include <linux/if.h>
18#include <linux/inetdevice.h>
19#include <net/protocol.h>
20#include <net/checksum.h>
21#include <linux/netfilter_ipv4.h>
22#include <linux/netfilter/x_tables.h>
23#include <net/netfilter/nf_nat_rule.h>
24#include <linux/netfilter_ipv4/ipt_SAME.h>
25
26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
28MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
29
30static bool
31same_check(const char *tablename,
32 const void *e,
33 const struct xt_target *target,
34 void *targinfo,
35 unsigned int hook_mask)
36{
37 unsigned int count, countess, rangeip, index = 0;
38 struct ipt_same_info *mr = targinfo;
39
40 mr->ipnum = 0;
41
42 if (mr->rangesize < 1) {
43 pr_debug("same_check: need at least one dest range.\n");
44 return false;
45 }
46 if (mr->rangesize > IPT_SAME_MAX_RANGE) {
47 pr_debug("same_check: too many ranges specified, maximum "
48 "is %u ranges\n", IPT_SAME_MAX_RANGE);
49 return false;
50 }
51 for (count = 0; count < mr->rangesize; count++) {
52 if (ntohl(mr->range[count].min_ip) >
53 ntohl(mr->range[count].max_ip)) {
54 pr_debug("same_check: min_ip is larger than max_ip in "
55 "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
56 NIPQUAD(mr->range[count].min_ip),
57 NIPQUAD(mr->range[count].max_ip));
58 return false;
59 }
60 if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
61 pr_debug("same_check: bad MAP_IPS.\n");
62 return false;
63 }
64 rangeip = (ntohl(mr->range[count].max_ip) -
65 ntohl(mr->range[count].min_ip) + 1);
66 mr->ipnum += rangeip;
67
68 pr_debug("same_check: range %u, ipnum = %u\n", count, rangeip);
69 }
70 pr_debug("same_check: total ipaddresses = %u\n", mr->ipnum);
71
72 mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL);
73 if (!mr->iparray) {
74 pr_debug("same_check: Couldn't allocate %Zu bytes "
75 "for %u ipaddresses!\n",
76 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
77 return false;
78 }
79 pr_debug("same_check: Allocated %Zu bytes for %u ipaddresses.\n",
80 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
81
82 for (count = 0; count < mr->rangesize; count++) {
83 for (countess = ntohl(mr->range[count].min_ip);
84 countess <= ntohl(mr->range[count].max_ip);
85 countess++) {
86 mr->iparray[index] = countess;
87 pr_debug("same_check: Added ipaddress `%u.%u.%u.%u' "
88 "in index %u.\n", HIPQUAD(countess), index);
89 index++;
90 }
91 }
92 return true;
93}
94
95static void
96same_destroy(const struct xt_target *target, void *targinfo)
97{
98 struct ipt_same_info *mr = targinfo;
99
100 kfree(mr->iparray);
101
102 pr_debug("same_destroy: Deallocated %Zu bytes for %u ipaddresses.\n",
103 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
104}
105
106static unsigned int
107same_target(struct sk_buff *skb,
108 const struct net_device *in,
109 const struct net_device *out,
110 unsigned int hooknum,
111 const struct xt_target *target,
112 const void *targinfo)
113{
114 struct nf_conn *ct;
115 enum ip_conntrack_info ctinfo;
116 u_int32_t tmpip, aindex;
117 __be32 new_ip;
118 const struct ipt_same_info *same = targinfo;
119 struct nf_nat_range newrange;
120 const struct nf_conntrack_tuple *t;
121
122 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
123 hooknum == NF_IP_POST_ROUTING);
124 ct = nf_ct_get(skb, &ctinfo);
125
126 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
127
128 /* Base new source on real src ip and optionally dst ip,
129 giving some hope for consistency across reboots.
130 Here we calculate the index in same->iparray which
131 holds the ipaddress we should use */
132
133 tmpip = ntohl(t->src.u3.ip);
134
135 if (!(same->info & IPT_SAME_NODST))
136 tmpip += ntohl(t->dst.u3.ip);
137 aindex = tmpip % same->ipnum;
138
139 new_ip = htonl(same->iparray[aindex]);
140
141 pr_debug("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
142 "new src=%u.%u.%u.%u\n",
143 NIPQUAD(t->src.u3.ip), NIPQUAD(t->dst.u3.ip), NIPQUAD(new_ip));
144
145 /* Transfer from original range. */
146 newrange = ((struct nf_nat_range)
147 { same->range[0].flags, new_ip, new_ip,
148 /* FIXME: Use ports from correct range! */
149 same->range[0].min, same->range[0].max });
150
151 /* Hand modified range to generic setup. */
152 return nf_nat_setup_info(ct, &newrange, hooknum);
153}
154
155static struct xt_target same_reg __read_mostly = {
156 .name = "SAME",
157 .family = AF_INET,
158 .target = same_target,
159 .targetsize = sizeof(struct ipt_same_info),
160 .table = "nat",
161 .hooks = (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_POST_ROUTING),
162 .checkentry = same_check,
163 .destroy = same_destroy,
164 .me = THIS_MODULE,
165};
166
167static int __init ipt_same_init(void)
168{
169 return xt_register_target(&same_reg);
170}
171
172static void __exit ipt_same_fini(void)
173{
174 xt_unregister_target(&same_reg);
175}
176
177module_init(ipt_same_init);
178module_exit(ipt_same_fini);
179
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
deleted file mode 100644
index d4573baa7f27..000000000000
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ /dev/null
@@ -1,87 +0,0 @@
1/* This is a module which is used for setting the TOS field of a packet. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <net/checksum.h>
15
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter_ipv4/ipt_TOS.h>
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
21MODULE_DESCRIPTION("iptables TOS mangling module");
22
23static unsigned int
24target(struct sk_buff *skb,
25 const struct net_device *in,
26 const struct net_device *out,
27 unsigned int hooknum,
28 const struct xt_target *target,
29 const void *targinfo)
30{
31 const struct ipt_tos_target_info *tosinfo = targinfo;
32 struct iphdr *iph = ip_hdr(skb);
33
34 if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
35 __u8 oldtos;
36 if (!skb_make_writable(skb, sizeof(struct iphdr)))
37 return NF_DROP;
38 iph = ip_hdr(skb);
39 oldtos = iph->tos;
40 iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
41 nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
42 }
43 return XT_CONTINUE;
44}
45
46static bool
47checkentry(const char *tablename,
48 const void *e_void,
49 const struct xt_target *target,
50 void *targinfo,
51 unsigned int hook_mask)
52{
53 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
54
55 if (tos != IPTOS_LOWDELAY
56 && tos != IPTOS_THROUGHPUT
57 && tos != IPTOS_RELIABILITY
58 && tos != IPTOS_MINCOST
59 && tos != IPTOS_NORMALSVC) {
60 printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
61 return false;
62 }
63 return true;
64}
65
66static struct xt_target ipt_tos_reg __read_mostly = {
67 .name = "TOS",
68 .family = AF_INET,
69 .target = target,
70 .targetsize = sizeof(struct ipt_tos_target_info),
71 .table = "mangle",
72 .checkentry = checkentry,
73 .me = THIS_MODULE,
74};
75
76static int __init ipt_tos_init(void)
77{
78 return xt_register_target(&ipt_tos_reg);
79}
80
81static void __exit ipt_tos_fini(void)
82{
83 xt_unregister_target(&ipt_tos_reg);
84}
85
86module_init(ipt_tos_init);
87module_exit(ipt_tos_fini);
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index c620a0527666..30eed65e7338 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -16,14 +16,13 @@
16#include <linux/netfilter_ipv4/ipt_TTL.h> 16#include <linux/netfilter_ipv4/ipt_TTL.h>
17 17
18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
19MODULE_DESCRIPTION("IP tables TTL modification module"); 19MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21 21
22static unsigned int 22static unsigned int
23ipt_ttl_target(struct sk_buff *skb, 23ttl_tg(struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, const struct net_device *out, 24 const struct net_device *out, unsigned int hooknum,
25 unsigned int hooknum, const struct xt_target *target, 25 const struct xt_target *target, const void *targinfo)
26 const void *targinfo)
27{ 26{
28 struct iphdr *iph; 27 struct iphdr *iph;
29 const struct ipt_TTL_info *info = targinfo; 28 const struct ipt_TTL_info *info = targinfo;
@@ -54,19 +53,18 @@ ipt_ttl_target(struct sk_buff *skb,
54 } 53 }
55 54
56 if (new_ttl != iph->ttl) { 55 if (new_ttl != iph->ttl) {
57 nf_csum_replace2(&iph->check, htons(iph->ttl << 8), 56 csum_replace2(&iph->check, htons(iph->ttl << 8),
58 htons(new_ttl << 8)); 57 htons(new_ttl << 8));
59 iph->ttl = new_ttl; 58 iph->ttl = new_ttl;
60 } 59 }
61 60
62 return XT_CONTINUE; 61 return XT_CONTINUE;
63} 62}
64 63
65static bool ipt_ttl_checkentry(const char *tablename, 64static bool
66 const void *e, 65ttl_tg_check(const char *tablename, const void *e,
67 const struct xt_target *target, 66 const struct xt_target *target, void *targinfo,
68 void *targinfo, 67 unsigned int hook_mask)
69 unsigned int hook_mask)
70{ 68{
71 const struct ipt_TTL_info *info = targinfo; 69 const struct ipt_TTL_info *info = targinfo;
72 70
@@ -80,25 +78,25 @@ static bool ipt_ttl_checkentry(const char *tablename,
80 return true; 78 return true;
81} 79}
82 80
83static struct xt_target ipt_TTL __read_mostly = { 81static struct xt_target ttl_tg_reg __read_mostly = {
84 .name = "TTL", 82 .name = "TTL",
85 .family = AF_INET, 83 .family = AF_INET,
86 .target = ipt_ttl_target, 84 .target = ttl_tg,
87 .targetsize = sizeof(struct ipt_TTL_info), 85 .targetsize = sizeof(struct ipt_TTL_info),
88 .table = "mangle", 86 .table = "mangle",
89 .checkentry = ipt_ttl_checkentry, 87 .checkentry = ttl_tg_check,
90 .me = THIS_MODULE, 88 .me = THIS_MODULE,
91}; 89};
92 90
93static int __init ipt_ttl_init(void) 91static int __init ttl_tg_init(void)
94{ 92{
95 return xt_register_target(&ipt_TTL); 93 return xt_register_target(&ttl_tg_reg);
96} 94}
97 95
98static void __exit ipt_ttl_fini(void) 96static void __exit ttl_tg_exit(void)
99{ 97{
100 xt_unregister_target(&ipt_TTL); 98 xt_unregister_target(&ttl_tg_reg);
101} 99}
102 100
103module_init(ipt_ttl_init); 101module_init(ttl_tg_init);
104module_exit(ipt_ttl_fini); 102module_exit(ttl_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 212b830765a4..b192756c6d0d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -43,13 +43,14 @@
43#include <linux/netfilter.h> 43#include <linux/netfilter.h>
44#include <linux/netfilter/x_tables.h> 44#include <linux/netfilter/x_tables.h>
45#include <linux/netfilter_ipv4/ipt_ULOG.h> 45#include <linux/netfilter_ipv4/ipt_ULOG.h>
46#include <net/netfilter/nf_log.h>
46#include <net/sock.h> 47#include <net/sock.h>
47#include <linux/bitops.h> 48#include <linux/bitops.h>
48#include <asm/unaligned.h> 49#include <asm/unaligned.h>
49 50
50MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
51MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); 52MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
52MODULE_DESCRIPTION("iptables userspace logging module"); 53MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
53MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); 54MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
54 55
55#define ULOG_NL_EVENT 111 /* Harald's favorite number */ 56#define ULOG_NL_EVENT 111 /* Harald's favorite number */
@@ -279,12 +280,10 @@ alloc_failure:
279 spin_unlock_bh(&ulog_lock); 280 spin_unlock_bh(&ulog_lock);
280} 281}
281 282
282static unsigned int ipt_ulog_target(struct sk_buff *skb, 283static unsigned int
283 const struct net_device *in, 284ulog_tg(struct sk_buff *skb, const struct net_device *in,
284 const struct net_device *out, 285 const struct net_device *out, unsigned int hooknum,
285 unsigned int hooknum, 286 const struct xt_target *target, const void *targinfo)
286 const struct xt_target *target,
287 const void *targinfo)
288{ 287{
289 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; 288 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
290 289
@@ -318,11 +317,10 @@ static void ipt_logfn(unsigned int pf,
318 ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); 317 ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
319} 318}
320 319
321static bool ipt_ulog_checkentry(const char *tablename, 320static bool
322 const void *e, 321ulog_tg_check(const char *tablename, const void *e,
323 const struct xt_target *target, 322 const struct xt_target *target, void *targinfo,
324 void *targinfo, 323 unsigned int hookmask)
325 unsigned int hookmask)
326{ 324{
327 const struct ipt_ulog_info *loginfo = targinfo; 325 const struct ipt_ulog_info *loginfo = targinfo;
328 326
@@ -347,7 +345,7 @@ struct compat_ipt_ulog_info {
347 char prefix[ULOG_PREFIX_LEN]; 345 char prefix[ULOG_PREFIX_LEN];
348}; 346};
349 347
350static void compat_from_user(void *dst, void *src) 348static void ulog_tg_compat_from_user(void *dst, void *src)
351{ 349{
352 const struct compat_ipt_ulog_info *cl = src; 350 const struct compat_ipt_ulog_info *cl = src;
353 struct ipt_ulog_info l = { 351 struct ipt_ulog_info l = {
@@ -360,7 +358,7 @@ static void compat_from_user(void *dst, void *src)
360 memcpy(dst, &l, sizeof(l)); 358 memcpy(dst, &l, sizeof(l));
361} 359}
362 360
363static int compat_to_user(void __user *dst, void *src) 361static int ulog_tg_compat_to_user(void __user *dst, void *src)
364{ 362{
365 const struct ipt_ulog_info *l = src; 363 const struct ipt_ulog_info *l = src;
366 struct compat_ipt_ulog_info cl = { 364 struct compat_ipt_ulog_info cl = {
@@ -374,16 +372,16 @@ static int compat_to_user(void __user *dst, void *src)
374} 372}
375#endif /* CONFIG_COMPAT */ 373#endif /* CONFIG_COMPAT */
376 374
377static struct xt_target ipt_ulog_reg __read_mostly = { 375static struct xt_target ulog_tg_reg __read_mostly = {
378 .name = "ULOG", 376 .name = "ULOG",
379 .family = AF_INET, 377 .family = AF_INET,
380 .target = ipt_ulog_target, 378 .target = ulog_tg,
381 .targetsize = sizeof(struct ipt_ulog_info), 379 .targetsize = sizeof(struct ipt_ulog_info),
382 .checkentry = ipt_ulog_checkentry, 380 .checkentry = ulog_tg_check,
383#ifdef CONFIG_COMPAT 381#ifdef CONFIG_COMPAT
384 .compatsize = sizeof(struct compat_ipt_ulog_info), 382 .compatsize = sizeof(struct compat_ipt_ulog_info),
385 .compat_from_user = compat_from_user, 383 .compat_from_user = ulog_tg_compat_from_user,
386 .compat_to_user = compat_to_user, 384 .compat_to_user = ulog_tg_compat_to_user,
387#endif 385#endif
388 .me = THIS_MODULE, 386 .me = THIS_MODULE,
389}; 387};
@@ -394,7 +392,7 @@ static struct nf_logger ipt_ulog_logger = {
394 .me = THIS_MODULE, 392 .me = THIS_MODULE,
395}; 393};
396 394
397static int __init ipt_ulog_init(void) 395static int __init ulog_tg_init(void)
398{ 396{
399 int ret, i; 397 int ret, i;
400 398
@@ -415,9 +413,9 @@ static int __init ipt_ulog_init(void)
415 if (!nflognl) 413 if (!nflognl)
416 return -ENOMEM; 414 return -ENOMEM;
417 415
418 ret = xt_register_target(&ipt_ulog_reg); 416 ret = xt_register_target(&ulog_tg_reg);
419 if (ret < 0) { 417 if (ret < 0) {
420 sock_release(nflognl->sk_socket); 418 netlink_kernel_release(nflognl);
421 return ret; 419 return ret;
422 } 420 }
423 if (nflog) 421 if (nflog)
@@ -426,7 +424,7 @@ static int __init ipt_ulog_init(void)
426 return 0; 424 return 0;
427} 425}
428 426
429static void __exit ipt_ulog_fini(void) 427static void __exit ulog_tg_exit(void)
430{ 428{
431 ulog_buff_t *ub; 429 ulog_buff_t *ub;
432 int i; 430 int i;
@@ -435,8 +433,8 @@ static void __exit ipt_ulog_fini(void)
435 433
436 if (nflog) 434 if (nflog)
437 nf_log_unregister(&ipt_ulog_logger); 435 nf_log_unregister(&ipt_ulog_logger);
438 xt_unregister_target(&ipt_ulog_reg); 436 xt_unregister_target(&ulog_tg_reg);
439 sock_release(nflognl->sk_socket); 437 netlink_kernel_release(nflognl);
440 438
441 /* remove pending timers and free allocated skb's */ 439 /* remove pending timers and free allocated skb's */
442 for (i = 0; i < ULOG_MAXNLGROUPS; i++) { 440 for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
@@ -453,5 +451,5 @@ static void __exit ipt_ulog_fini(void)
453 } 451 }
454} 452}
455 453
456module_init(ipt_ulog_init); 454module_init(ulog_tg_init);
457module_exit(ipt_ulog_fini); 455module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 59f01f7ba6b4..49587a497229 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -2,6 +2,7 @@
2 * iptables module to match inet_addr_type() of an ip. 2 * iptables module to match inet_addr_type() of an ip.
3 * 3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net> 4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -20,47 +21,119 @@
20 21
21MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
23MODULE_DESCRIPTION("iptables addrtype match"); 24MODULE_DESCRIPTION("Xtables: address type match for IPv4");
24 25
25static inline bool match_type(__be32 addr, u_int16_t mask) 26static inline bool match_type(const struct net_device *dev, __be32 addr,
27 u_int16_t mask)
26{ 28{
27 return !!(mask & (1 << inet_addr_type(addr))); 29 return !!(mask & (1 << inet_dev_addr_type(&init_net, dev, addr)));
28} 30}
29 31
30static bool match(const struct sk_buff *skb, 32static bool
31 const struct net_device *in, const struct net_device *out, 33addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in,
32 const struct xt_match *match, const void *matchinfo, 34 const struct net_device *out, const struct xt_match *match,
33 int offset, unsigned int protoff, bool *hotdrop) 35 const void *matchinfo, int offset, unsigned int protoff,
36 bool *hotdrop)
34{ 37{
35 const struct ipt_addrtype_info *info = matchinfo; 38 const struct ipt_addrtype_info *info = matchinfo;
36 const struct iphdr *iph = ip_hdr(skb); 39 const struct iphdr *iph = ip_hdr(skb);
37 bool ret = true; 40 bool ret = true;
38 41
39 if (info->source) 42 if (info->source)
40 ret &= match_type(iph->saddr, info->source)^info->invert_source; 43 ret &= match_type(NULL, iph->saddr, info->source) ^
44 info->invert_source;
41 if (info->dest) 45 if (info->dest)
42 ret &= match_type(iph->daddr, info->dest)^info->invert_dest; 46 ret &= match_type(NULL, iph->daddr, info->dest) ^
47 info->invert_dest;
43 48
44 return ret; 49 return ret;
45} 50}
46 51
47static struct xt_match addrtype_match __read_mostly = { 52static bool
48 .name = "addrtype", 53addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in,
49 .family = AF_INET, 54 const struct net_device *out, const struct xt_match *match,
50 .match = match, 55 const void *matchinfo, int offset, unsigned int protoff,
51 .matchsize = sizeof(struct ipt_addrtype_info), 56 bool *hotdrop)
52 .me = THIS_MODULE 57{
58 const struct ipt_addrtype_info_v1 *info = matchinfo;
59 const struct iphdr *iph = ip_hdr(skb);
60 const struct net_device *dev = NULL;
61 bool ret = true;
62
63 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
64 dev = in;
65 else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
66 dev = out;
67
68 if (info->source)
69 ret &= match_type(dev, iph->saddr, info->source) ^
70 (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
71 if (ret && info->dest)
72 ret &= match_type(dev, iph->daddr, info->dest) ^
73 (info->flags & IPT_ADDRTYPE_INVERT_DEST);
74 return ret;
75}
76
77static bool
78addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
79 const struct xt_match *match, void *matchinfo,
80 unsigned int hook_mask)
81{
82 struct ipt_addrtype_info_v1 *info = matchinfo;
83
84 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
85 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
86 printk(KERN_ERR "ipt_addrtype: both incoming and outgoing "
87 "interface limitation cannot be selected\n");
88 return false;
89 }
90
91 if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) &&
92 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
93 printk(KERN_ERR "ipt_addrtype: output interface limitation "
94 "not valid in PRE_ROUTING and INPUT\n");
95 return false;
96 }
97
98 if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) &&
99 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
100 printk(KERN_ERR "ipt_addrtype: input interface limitation "
101 "not valid in POST_ROUTING and OUTPUT\n");
102 return false;
103 }
104
105 return true;
106}
107
108static struct xt_match addrtype_mt_reg[] __read_mostly = {
109 {
110 .name = "addrtype",
111 .family = AF_INET,
112 .match = addrtype_mt_v0,
113 .matchsize = sizeof(struct ipt_addrtype_info),
114 .me = THIS_MODULE
115 },
116 {
117 .name = "addrtype",
118 .family = AF_INET,
119 .revision = 1,
120 .match = addrtype_mt_v1,
121 .checkentry = addrtype_mt_checkentry_v1,
122 .matchsize = sizeof(struct ipt_addrtype_info_v1),
123 .me = THIS_MODULE
124 }
53}; 125};
54 126
55static int __init ipt_addrtype_init(void) 127static int __init addrtype_mt_init(void)
56{ 128{
57 return xt_register_match(&addrtype_match); 129 return xt_register_matches(addrtype_mt_reg,
130 ARRAY_SIZE(addrtype_mt_reg));
58} 131}
59 132
60static void __exit ipt_addrtype_fini(void) 133static void __exit addrtype_mt_exit(void)
61{ 134{
62 xt_unregister_match(&addrtype_match); 135 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
63} 136}
64 137
65module_init(ipt_addrtype_init); 138module_init(addrtype_mt_init);
66module_exit(ipt_addrtype_fini); 139module_exit(addrtype_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 61b017fd743c..e977989629c7 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -16,7 +16,7 @@
16 16
17MODULE_LICENSE("GPL"); 17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); 18MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
19MODULE_DESCRIPTION("iptables AH SPI match module"); 19MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
20 20
21#ifdef DEBUG_CONNTRACK 21#ifdef DEBUG_CONNTRACK
22#define duprintf(format, args...) printk(format , ## args) 22#define duprintf(format, args...) printk(format , ## args)
@@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
37} 37}
38 38
39static bool 39static bool
40match(const struct sk_buff *skb, 40ah_mt(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 41 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 42 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
43 const struct xt_match *match,
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 43{
49 struct ip_auth_hdr _ahdr; 44 struct ip_auth_hdr _ahdr;
50 const struct ip_auth_hdr *ah; 45 const struct ip_auth_hdr *ah;
@@ -72,11 +67,9 @@ match(const struct sk_buff *skb,
72 67
73/* Called when user tries to insert an entry of this type. */ 68/* Called when user tries to insert an entry of this type. */
74static bool 69static bool
75checkentry(const char *tablename, 70ah_mt_check(const char *tablename, const void *ip_void,
76 const void *ip_void, 71 const struct xt_match *match, void *matchinfo,
77 const struct xt_match *match, 72 unsigned int hook_mask)
78 void *matchinfo,
79 unsigned int hook_mask)
80{ 73{
81 const struct ipt_ah *ahinfo = matchinfo; 74 const struct ipt_ah *ahinfo = matchinfo;
82 75
@@ -88,25 +81,25 @@ checkentry(const char *tablename,
88 return true; 81 return true;
89} 82}
90 83
91static struct xt_match ah_match __read_mostly = { 84static struct xt_match ah_mt_reg __read_mostly = {
92 .name = "ah", 85 .name = "ah",
93 .family = AF_INET, 86 .family = AF_INET,
94 .match = match, 87 .match = ah_mt,
95 .matchsize = sizeof(struct ipt_ah), 88 .matchsize = sizeof(struct ipt_ah),
96 .proto = IPPROTO_AH, 89 .proto = IPPROTO_AH,
97 .checkentry = checkentry, 90 .checkentry = ah_mt_check,
98 .me = THIS_MODULE, 91 .me = THIS_MODULE,
99}; 92};
100 93
101static int __init ipt_ah_init(void) 94static int __init ah_mt_init(void)
102{ 95{
103 return xt_register_match(&ah_match); 96 return xt_register_match(&ah_mt_reg);
104} 97}
105 98
106static void __exit ipt_ah_fini(void) 99static void __exit ah_mt_exit(void)
107{ 100{
108 xt_unregister_match(&ah_match); 101 xt_unregister_match(&ah_mt_reg);
109} 102}
110 103
111module_init(ipt_ah_init); 104module_init(ah_mt_init);
112module_exit(ipt_ah_fini); 105module_exit(ah_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index d6925c674069..749de8284ce5 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -19,7 +19,7 @@
19#include <linux/netfilter_ipv4/ipt_ecn.h> 19#include <linux/netfilter_ipv4/ipt_ecn.h>
20 20
21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
22MODULE_DESCRIPTION("iptables ECN matching module"); 22MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24 24
25static inline bool match_ip(const struct sk_buff *skb, 25static inline bool match_ip(const struct sk_buff *skb,
@@ -67,10 +67,10 @@ static inline bool match_tcp(const struct sk_buff *skb,
67 return true; 67 return true;
68} 68}
69 69
70static bool match(const struct sk_buff *skb, 70static bool
71 const struct net_device *in, const struct net_device *out, 71ecn_mt(const struct sk_buff *skb, const struct net_device *in,
72 const struct xt_match *match, const void *matchinfo, 72 const struct net_device *out, const struct xt_match *match,
73 int offset, unsigned int protoff, bool *hotdrop) 73 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
74{ 74{
75 const struct ipt_ecn_info *info = matchinfo; 75 const struct ipt_ecn_info *info = matchinfo;
76 76
@@ -88,9 +88,10 @@ static bool match(const struct sk_buff *skb,
88 return true; 88 return true;
89} 89}
90 90
91static bool checkentry(const char *tablename, const void *ip_void, 91static bool
92 const struct xt_match *match, 92ecn_mt_check(const char *tablename, const void *ip_void,
93 void *matchinfo, unsigned int hook_mask) 93 const struct xt_match *match, void *matchinfo,
94 unsigned int hook_mask)
94{ 95{
95 const struct ipt_ecn_info *info = matchinfo; 96 const struct ipt_ecn_info *info = matchinfo;
96 const struct ipt_ip *ip = ip_void; 97 const struct ipt_ip *ip = ip_void;
@@ -111,24 +112,24 @@ static bool checkentry(const char *tablename, const void *ip_void,
111 return true; 112 return true;
112} 113}
113 114
114static struct xt_match ecn_match __read_mostly = { 115static struct xt_match ecn_mt_reg __read_mostly = {
115 .name = "ecn", 116 .name = "ecn",
116 .family = AF_INET, 117 .family = AF_INET,
117 .match = match, 118 .match = ecn_mt,
118 .matchsize = sizeof(struct ipt_ecn_info), 119 .matchsize = sizeof(struct ipt_ecn_info),
119 .checkentry = checkentry, 120 .checkentry = ecn_mt_check,
120 .me = THIS_MODULE, 121 .me = THIS_MODULE,
121}; 122};
122 123
123static int __init ipt_ecn_init(void) 124static int __init ecn_mt_init(void)
124{ 125{
125 return xt_register_match(&ecn_match); 126 return xt_register_match(&ecn_mt_reg);
126} 127}
127 128
128static void __exit ipt_ecn_fini(void) 129static void __exit ecn_mt_exit(void)
129{ 130{
130 xt_unregister_match(&ecn_match); 131 xt_unregister_match(&ecn_mt_reg);
131} 132}
132 133
133module_init(ipt_ecn_init); 134module_init(ecn_mt_init);
134module_exit(ipt_ecn_fini); 135module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
deleted file mode 100644
index 0106dc955a69..000000000000
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ /dev/null
@@ -1,79 +0,0 @@
1/*
2 * iptables module to match IP address ranges
3 *
4 * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ip.h>
13#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter_ipv4/ipt_iprange.h>
15
16MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
18MODULE_DESCRIPTION("iptables arbitrary IP range match module");
19
20static bool
21match(const struct sk_buff *skb,
22 const struct net_device *in,
23 const struct net_device *out,
24 const struct xt_match *match,
25 const void *matchinfo,
26 int offset, unsigned int protoff, bool *hotdrop)
27{
28 const struct ipt_iprange_info *info = matchinfo;
29 const struct iphdr *iph = ip_hdr(skb);
30
31 if (info->flags & IPRANGE_SRC) {
32 if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
33 || ntohl(iph->saddr) > ntohl(info->src.max_ip))
34 ^ !!(info->flags & IPRANGE_SRC_INV)) {
35 pr_debug("src IP %u.%u.%u.%u NOT in range %s"
36 "%u.%u.%u.%u-%u.%u.%u.%u\n",
37 NIPQUAD(iph->saddr),
38 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
39 NIPQUAD(info->src.min_ip),
40 NIPQUAD(info->src.max_ip));
41 return false;
42 }
43 }
44 if (info->flags & IPRANGE_DST) {
45 if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
46 || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
47 ^ !!(info->flags & IPRANGE_DST_INV)) {
48 pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
49 "%u.%u.%u.%u-%u.%u.%u.%u\n",
50 NIPQUAD(iph->daddr),
51 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
52 NIPQUAD(info->dst.min_ip),
53 NIPQUAD(info->dst.max_ip));
54 return false;
55 }
56 }
57 return true;
58}
59
60static struct xt_match iprange_match __read_mostly = {
61 .name = "iprange",
62 .family = AF_INET,
63 .match = match,
64 .matchsize = sizeof(struct ipt_iprange_info),
65 .me = THIS_MODULE
66};
67
68static int __init ipt_iprange_init(void)
69{
70 return xt_register_match(&iprange_match);
71}
72
73static void __exit ipt_iprange_fini(void)
74{
75 xt_unregister_match(&iprange_match);
76}
77
78module_init(ipt_iprange_init);
79module_exit(ipt_iprange_fini);
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
deleted file mode 100644
index b14e77da7a33..000000000000
--- a/net/ipv4/netfilter/ipt_owner.c
+++ /dev/null
@@ -1,92 +0,0 @@
1/* Kernel module to match various things tied to sockets associated with
2 locally generated outgoing packets. */
3
4/* (C) 2000 Marc Boucher <marc@mbsi.ca>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/file.h>
14#include <linux/rcupdate.h>
15#include <net/sock.h>
16
17#include <linux/netfilter_ipv4/ipt_owner.h>
18#include <linux/netfilter/x_tables.h>
19
20MODULE_LICENSE("GPL");
21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
22MODULE_DESCRIPTION("iptables owner match");
23
24static bool
25match(const struct sk_buff *skb,
26 const struct net_device *in,
27 const struct net_device *out,
28 const struct xt_match *match,
29 const void *matchinfo,
30 int offset,
31 unsigned int protoff,
32 bool *hotdrop)
33{
34 const struct ipt_owner_info *info = matchinfo;
35
36 if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
37 return false;
38
39 if(info->match & IPT_OWNER_UID) {
40 if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
41 !!(info->invert & IPT_OWNER_UID))
42 return false;
43 }
44
45 if(info->match & IPT_OWNER_GID) {
46 if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
47 !!(info->invert & IPT_OWNER_GID))
48 return false;
49 }
50
51 return true;
52}
53
54static bool
55checkentry(const char *tablename,
56 const void *ip,
57 const struct xt_match *match,
58 void *matchinfo,
59 unsigned int hook_mask)
60{
61 const struct ipt_owner_info *info = matchinfo;
62
63 if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
64 printk("ipt_owner: pid, sid and command matching "
65 "not supported anymore\n");
66 return false;
67 }
68 return true;
69}
70
71static struct xt_match owner_match __read_mostly = {
72 .name = "owner",
73 .family = AF_INET,
74 .match = match,
75 .matchsize = sizeof(struct ipt_owner_info),
76 .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING),
77 .checkentry = checkentry,
78 .me = THIS_MODULE,
79};
80
81static int __init ipt_owner_init(void)
82{
83 return xt_register_match(&owner_match);
84}
85
86static void __exit ipt_owner_fini(void)
87{
88 xt_unregister_match(&owner_match);
89}
90
91module_init(ipt_owner_init);
92module_exit(ipt_owner_fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 11d39fb5f38b..68cbe3ca01ce 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -30,7 +30,7 @@
30#include <linux/netfilter_ipv4/ipt_recent.h> 30#include <linux/netfilter_ipv4/ipt_recent.h>
31 31
32MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 32MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
33MODULE_DESCRIPTION("IP tables recently seen matching module"); 33MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
34MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
35 35
36static unsigned int ip_list_tot = 100; 36static unsigned int ip_list_tot = 100;
@@ -170,10 +170,10 @@ static void recent_table_flush(struct recent_table *t)
170} 170}
171 171
172static bool 172static bool
173ipt_recent_match(const struct sk_buff *skb, 173recent_mt(const struct sk_buff *skb, const struct net_device *in,
174 const struct net_device *in, const struct net_device *out, 174 const struct net_device *out, const struct xt_match *match,
175 const struct xt_match *match, const void *matchinfo, 175 const void *matchinfo, int offset, unsigned int protoff,
176 int offset, unsigned int protoff, bool *hotdrop) 176 bool *hotdrop)
177{ 177{
178 const struct ipt_recent_info *info = matchinfo; 178 const struct ipt_recent_info *info = matchinfo;
179 struct recent_table *t; 179 struct recent_table *t;
@@ -212,11 +212,11 @@ ipt_recent_match(const struct sk_buff *skb,
212 recent_entry_remove(t, e); 212 recent_entry_remove(t, e);
213 ret = !ret; 213 ret = !ret;
214 } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { 214 } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) {
215 unsigned long t = jiffies - info->seconds * HZ; 215 unsigned long time = jiffies - info->seconds * HZ;
216 unsigned int i, hits = 0; 216 unsigned int i, hits = 0;
217 217
218 for (i = 0; i < e->nstamps; i++) { 218 for (i = 0; i < e->nstamps; i++) {
219 if (info->seconds && time_after(t, e->stamps[i])) 219 if (info->seconds && time_after(time, e->stamps[i]))
220 continue; 220 continue;
221 if (++hits >= info->hit_count) { 221 if (++hits >= info->hit_count) {
222 ret = !ret; 222 ret = !ret;
@@ -236,9 +236,9 @@ out:
236} 236}
237 237
238static bool 238static bool
239ipt_recent_checkentry(const char *tablename, const void *ip, 239recent_mt_check(const char *tablename, const void *ip,
240 const struct xt_match *match, void *matchinfo, 240 const struct xt_match *match, void *matchinfo,
241 unsigned int hook_mask) 241 unsigned int hook_mask)
242{ 242{
243 const struct ipt_recent_info *info = matchinfo; 243 const struct ipt_recent_info *info = matchinfo;
244 struct recent_table *t; 244 struct recent_table *t;
@@ -293,8 +293,7 @@ out:
293 return ret; 293 return ret;
294} 294}
295 295
296static void 296static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
297ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
298{ 297{
299 const struct ipt_recent_info *info = matchinfo; 298 const struct ipt_recent_info *info = matchinfo;
300 struct recent_table *t; 299 struct recent_table *t;
@@ -321,6 +320,7 @@ struct recent_iter_state {
321}; 320};
322 321
323static void *recent_seq_start(struct seq_file *seq, loff_t *pos) 322static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
323 __acquires(recent_lock)
324{ 324{
325 struct recent_iter_state *st = seq->private; 325 struct recent_iter_state *st = seq->private;
326 const struct recent_table *t = st->table; 326 const struct recent_table *t = st->table;
@@ -353,6 +353,7 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
353} 353}
354 354
355static void recent_seq_stop(struct seq_file *s, void *v) 355static void recent_seq_stop(struct seq_file *s, void *v)
356 __releases(recent_lock)
356{ 357{
357 spin_unlock_bh(&recent_lock); 358 spin_unlock_bh(&recent_lock);
358} 359}
@@ -455,17 +456,17 @@ static const struct file_operations recent_fops = {
455}; 456};
456#endif /* CONFIG_PROC_FS */ 457#endif /* CONFIG_PROC_FS */
457 458
458static struct xt_match recent_match __read_mostly = { 459static struct xt_match recent_mt_reg __read_mostly = {
459 .name = "recent", 460 .name = "recent",
460 .family = AF_INET, 461 .family = AF_INET,
461 .match = ipt_recent_match, 462 .match = recent_mt,
462 .matchsize = sizeof(struct ipt_recent_info), 463 .matchsize = sizeof(struct ipt_recent_info),
463 .checkentry = ipt_recent_checkentry, 464 .checkentry = recent_mt_check,
464 .destroy = ipt_recent_destroy, 465 .destroy = recent_mt_destroy,
465 .me = THIS_MODULE, 466 .me = THIS_MODULE,
466}; 467};
467 468
468static int __init ipt_recent_init(void) 469static int __init recent_mt_init(void)
469{ 470{
470 int err; 471 int err;
471 472
@@ -473,27 +474,27 @@ static int __init ipt_recent_init(void)
473 return -EINVAL; 474 return -EINVAL;
474 ip_list_hash_size = 1 << fls(ip_list_tot); 475 ip_list_hash_size = 1 << fls(ip_list_tot);
475 476
476 err = xt_register_match(&recent_match); 477 err = xt_register_match(&recent_mt_reg);
477#ifdef CONFIG_PROC_FS 478#ifdef CONFIG_PROC_FS
478 if (err) 479 if (err)
479 return err; 480 return err;
480 proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); 481 proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
481 if (proc_dir == NULL) { 482 if (proc_dir == NULL) {
482 xt_unregister_match(&recent_match); 483 xt_unregister_match(&recent_mt_reg);
483 err = -ENOMEM; 484 err = -ENOMEM;
484 } 485 }
485#endif 486#endif
486 return err; 487 return err;
487} 488}
488 489
489static void __exit ipt_recent_exit(void) 490static void __exit recent_mt_exit(void)
490{ 491{
491 BUG_ON(!list_empty(&tables)); 492 BUG_ON(!list_empty(&tables));
492 xt_unregister_match(&recent_match); 493 xt_unregister_match(&recent_mt_reg);
493#ifdef CONFIG_PROC_FS 494#ifdef CONFIG_PROC_FS
494 remove_proc_entry("ipt_recent", init_net.proc_net); 495 remove_proc_entry("ipt_recent", init_net.proc_net);
495#endif 496#endif
496} 497}
497 498
498module_init(ipt_recent_init); 499module_init(recent_mt_init);
499module_exit(ipt_recent_exit); 500module_exit(recent_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
deleted file mode 100644
index e740441c973d..000000000000
--- a/net/ipv4/netfilter/ipt_tos.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/* Kernel module to match TOS values. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/ip.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14
15#include <linux/netfilter_ipv4/ipt_tos.h>
16#include <linux/netfilter/x_tables.h>
17
18MODULE_LICENSE("GPL");
19MODULE_DESCRIPTION("iptables TOS match module");
20
21static bool
22match(const struct sk_buff *skb,
23 const struct net_device *in,
24 const struct net_device *out,
25 const struct xt_match *match,
26 const void *matchinfo,
27 int offset,
28 unsigned int protoff,
29 bool *hotdrop)
30{
31 const struct ipt_tos_info *info = matchinfo;
32
33 return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
34}
35
36static struct xt_match tos_match __read_mostly = {
37 .name = "tos",
38 .family = AF_INET,
39 .match = match,
40 .matchsize = sizeof(struct ipt_tos_info),
41 .me = THIS_MODULE,
42};
43
44static int __init ipt_multiport_init(void)
45{
46 return xt_register_match(&tos_match);
47}
48
49static void __exit ipt_multiport_fini(void)
50{
51 xt_unregister_match(&tos_match);
52}
53
54module_init(ipt_multiport_init);
55module_exit(ipt_multiport_fini);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index a439900a4ba5..e0b8caeb710c 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -15,13 +15,13 @@
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16 16
17MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 17MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
18MODULE_DESCRIPTION("IP tables TTL matching module"); 18MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20 20
21static bool match(const struct sk_buff *skb, 21static bool
22 const struct net_device *in, const struct net_device *out, 22ttl_mt(const struct sk_buff *skb, const struct net_device *in,
23 const struct xt_match *match, const void *matchinfo, 23 const struct net_device *out, const struct xt_match *match,
24 int offset, unsigned int protoff, bool *hotdrop) 24 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
25{ 25{
26 const struct ipt_ttl_info *info = matchinfo; 26 const struct ipt_ttl_info *info = matchinfo;
27 const u8 ttl = ip_hdr(skb)->ttl; 27 const u8 ttl = ip_hdr(skb)->ttl;
@@ -44,23 +44,23 @@ static bool match(const struct sk_buff *skb,
44 return false; 44 return false;
45} 45}
46 46
47static struct xt_match ttl_match __read_mostly = { 47static struct xt_match ttl_mt_reg __read_mostly = {
48 .name = "ttl", 48 .name = "ttl",
49 .family = AF_INET, 49 .family = AF_INET,
50 .match = match, 50 .match = ttl_mt,
51 .matchsize = sizeof(struct ipt_ttl_info), 51 .matchsize = sizeof(struct ipt_ttl_info),
52 .me = THIS_MODULE, 52 .me = THIS_MODULE,
53}; 53};
54 54
55static int __init ipt_ttl_init(void) 55static int __init ttl_mt_init(void)
56{ 56{
57 return xt_register_match(&ttl_match); 57 return xt_register_match(&ttl_mt_reg);
58} 58}
59 59
60static void __exit ipt_ttl_fini(void) 60static void __exit ttl_mt_exit(void)
61{ 61{
62 xt_unregister_match(&ttl_match); 62 xt_unregister_match(&ttl_mt_reg);
63} 63}
64 64
65module_init(ipt_ttl_init); 65module_init(ttl_mt_init);
66module_exit(ipt_ttl_fini); 66module_exit(ttl_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index ba3262c60437..69f3d7e6e96f 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -19,28 +19,30 @@ MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 19MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
20MODULE_DESCRIPTION("iptables filter table"); 20MODULE_DESCRIPTION("iptables filter table");
21 21
22#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT)) 22#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
23 (1 << NF_INET_FORWARD) | \
24 (1 << NF_INET_LOCAL_OUT))
23 25
24static struct 26static struct
25{ 27{
26 struct ipt_replace repl; 28 struct ipt_replace repl;
27 struct ipt_standard entries[3]; 29 struct ipt_standard entries[3];
28 struct ipt_error term; 30 struct ipt_error term;
29} initial_table __initdata = { 31} initial_table __net_initdata = {
30 .repl = { 32 .repl = {
31 .name = "filter", 33 .name = "filter",
32 .valid_hooks = FILTER_VALID_HOOKS, 34 .valid_hooks = FILTER_VALID_HOOKS,
33 .num_entries = 4, 35 .num_entries = 4,
34 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), 36 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
35 .hook_entry = { 37 .hook_entry = {
36 [NF_IP_LOCAL_IN] = 0, 38 [NF_INET_LOCAL_IN] = 0,
37 [NF_IP_FORWARD] = sizeof(struct ipt_standard), 39 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
38 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, 40 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
39 }, 41 },
40 .underflow = { 42 .underflow = {
41 [NF_IP_LOCAL_IN] = 0, 43 [NF_INET_LOCAL_IN] = 0,
42 [NF_IP_FORWARD] = sizeof(struct ipt_standard), 44 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
43 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, 45 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
44 }, 46 },
45 }, 47 },
46 .entries = { 48 .entries = {
@@ -67,7 +69,7 @@ ipt_hook(unsigned int hook,
67 const struct net_device *out, 69 const struct net_device *out,
68 int (*okfn)(struct sk_buff *)) 70 int (*okfn)(struct sk_buff *))
69{ 71{
70 return ipt_do_table(skb, hook, in, out, &packet_filter); 72 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
71} 73}
72 74
73static unsigned int 75static unsigned int
@@ -86,29 +88,29 @@ ipt_local_out_hook(unsigned int hook,
86 return NF_ACCEPT; 88 return NF_ACCEPT;
87 } 89 }
88 90
89 return ipt_do_table(skb, hook, in, out, &packet_filter); 91 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
90} 92}
91 93
92static struct nf_hook_ops ipt_ops[] = { 94static struct nf_hook_ops ipt_ops[] __read_mostly = {
93 { 95 {
94 .hook = ipt_hook, 96 .hook = ipt_hook,
95 .owner = THIS_MODULE, 97 .owner = THIS_MODULE,
96 .pf = PF_INET, 98 .pf = PF_INET,
97 .hooknum = NF_IP_LOCAL_IN, 99 .hooknum = NF_INET_LOCAL_IN,
98 .priority = NF_IP_PRI_FILTER, 100 .priority = NF_IP_PRI_FILTER,
99 }, 101 },
100 { 102 {
101 .hook = ipt_hook, 103 .hook = ipt_hook,
102 .owner = THIS_MODULE, 104 .owner = THIS_MODULE,
103 .pf = PF_INET, 105 .pf = PF_INET,
104 .hooknum = NF_IP_FORWARD, 106 .hooknum = NF_INET_FORWARD,
105 .priority = NF_IP_PRI_FILTER, 107 .priority = NF_IP_PRI_FILTER,
106 }, 108 },
107 { 109 {
108 .hook = ipt_local_out_hook, 110 .hook = ipt_local_out_hook,
109 .owner = THIS_MODULE, 111 .owner = THIS_MODULE,
110 .pf = PF_INET, 112 .pf = PF_INET,
111 .hooknum = NF_IP_LOCAL_OUT, 113 .hooknum = NF_INET_LOCAL_OUT,
112 .priority = NF_IP_PRI_FILTER, 114 .priority = NF_IP_PRI_FILTER,
113 }, 115 },
114}; 116};
@@ -117,6 +119,26 @@ static struct nf_hook_ops ipt_ops[] = {
117static int forward = NF_ACCEPT; 119static int forward = NF_ACCEPT;
118module_param(forward, bool, 0000); 120module_param(forward, bool, 0000);
119 121
122static int __net_init iptable_filter_net_init(struct net *net)
123{
124 /* Register table */
125 net->ipv4.iptable_filter =
126 ipt_register_table(net, &packet_filter, &initial_table.repl);
127 if (IS_ERR(net->ipv4.iptable_filter))
128 return PTR_ERR(net->ipv4.iptable_filter);
129 return 0;
130}
131
132static void __net_exit iptable_filter_net_exit(struct net *net)
133{
134 ipt_unregister_table(net->ipv4.iptable_filter);
135}
136
137static struct pernet_operations iptable_filter_net_ops = {
138 .init = iptable_filter_net_init,
139 .exit = iptable_filter_net_exit,
140};
141
120static int __init iptable_filter_init(void) 142static int __init iptable_filter_init(void)
121{ 143{
122 int ret; 144 int ret;
@@ -129,8 +151,7 @@ static int __init iptable_filter_init(void)
129 /* Entry 1 is the FORWARD hook */ 151 /* Entry 1 is the FORWARD hook */
130 initial_table.entries[1].target.verdict = -forward - 1; 152 initial_table.entries[1].target.verdict = -forward - 1;
131 153
132 /* Register table */ 154 ret = register_pernet_subsys(&iptable_filter_net_ops);
133 ret = ipt_register_table(&packet_filter, &initial_table.repl);
134 if (ret < 0) 155 if (ret < 0)
135 return ret; 156 return ret;
136 157
@@ -142,14 +163,14 @@ static int __init iptable_filter_init(void)
142 return ret; 163 return ret;
143 164
144 cleanup_table: 165 cleanup_table:
145 ipt_unregister_table(&packet_filter); 166 unregister_pernet_subsys(&iptable_filter_net_ops);
146 return ret; 167 return ret;
147} 168}
148 169
149static void __exit iptable_filter_fini(void) 170static void __exit iptable_filter_fini(void)
150{ 171{
151 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 172 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
152 ipt_unregister_table(&packet_filter); 173 unregister_pernet_subsys(&iptable_filter_net_ops);
153} 174}
154 175
155module_init(iptable_filter_init); 176module_init(iptable_filter_init);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index b4360a69d5ca..c55a210853a7 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -21,11 +21,11 @@ MODULE_LICENSE("GPL");
21MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 21MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
22MODULE_DESCRIPTION("iptables mangle table"); 22MODULE_DESCRIPTION("iptables mangle table");
23 23
24#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | \ 24#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
25 (1 << NF_IP_LOCAL_IN) | \ 25 (1 << NF_INET_LOCAL_IN) | \
26 (1 << NF_IP_FORWARD) | \ 26 (1 << NF_INET_FORWARD) | \
27 (1 << NF_IP_LOCAL_OUT) | \ 27 (1 << NF_INET_LOCAL_OUT) | \
28 (1 << NF_IP_POST_ROUTING)) 28 (1 << NF_INET_POST_ROUTING))
29 29
30/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ 30/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
31static struct 31static struct
@@ -33,25 +33,25 @@ static struct
33 struct ipt_replace repl; 33 struct ipt_replace repl;
34 struct ipt_standard entries[5]; 34 struct ipt_standard entries[5];
35 struct ipt_error term; 35 struct ipt_error term;
36} initial_table __initdata = { 36} initial_table __net_initdata = {
37 .repl = { 37 .repl = {
38 .name = "mangle", 38 .name = "mangle",
39 .valid_hooks = MANGLE_VALID_HOOKS, 39 .valid_hooks = MANGLE_VALID_HOOKS,
40 .num_entries = 6, 40 .num_entries = 6,
41 .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), 41 .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
42 .hook_entry = { 42 .hook_entry = {
43 [NF_IP_PRE_ROUTING] = 0, 43 [NF_INET_PRE_ROUTING] = 0,
44 [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), 44 [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
45 [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, 45 [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
46 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, 46 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
47 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4, 47 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
48 }, 48 },
49 .underflow = { 49 .underflow = {
50 [NF_IP_PRE_ROUTING] = 0, 50 [NF_INET_PRE_ROUTING] = 0,
51 [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), 51 [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
52 [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, 52 [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
53 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, 53 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
54 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4, 54 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
55 }, 55 },
56 }, 56 },
57 .entries = { 57 .entries = {
@@ -80,7 +80,7 @@ ipt_route_hook(unsigned int hook,
80 const struct net_device *out, 80 const struct net_device *out,
81 int (*okfn)(struct sk_buff *)) 81 int (*okfn)(struct sk_buff *))
82{ 82{
83 return ipt_do_table(skb, hook, in, out, &packet_mangler); 83 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
84} 84}
85 85
86static unsigned int 86static unsigned int
@@ -112,7 +112,7 @@ ipt_local_hook(unsigned int hook,
112 daddr = iph->daddr; 112 daddr = iph->daddr;
113 tos = iph->tos; 113 tos = iph->tos;
114 114
115 ret = ipt_do_table(skb, hook, in, out, &packet_mangler); 115 ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
116 /* Reroute for ANY change. */ 116 /* Reroute for ANY change. */
117 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { 117 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
118 iph = ip_hdr(skb); 118 iph = ip_hdr(skb);
@@ -128,50 +128,69 @@ ipt_local_hook(unsigned int hook,
128 return ret; 128 return ret;
129} 129}
130 130
131static struct nf_hook_ops ipt_ops[] = { 131static struct nf_hook_ops ipt_ops[] __read_mostly = {
132 { 132 {
133 .hook = ipt_route_hook, 133 .hook = ipt_route_hook,
134 .owner = THIS_MODULE, 134 .owner = THIS_MODULE,
135 .pf = PF_INET, 135 .pf = PF_INET,
136 .hooknum = NF_IP_PRE_ROUTING, 136 .hooknum = NF_INET_PRE_ROUTING,
137 .priority = NF_IP_PRI_MANGLE, 137 .priority = NF_IP_PRI_MANGLE,
138 }, 138 },
139 { 139 {
140 .hook = ipt_route_hook, 140 .hook = ipt_route_hook,
141 .owner = THIS_MODULE, 141 .owner = THIS_MODULE,
142 .pf = PF_INET, 142 .pf = PF_INET,
143 .hooknum = NF_IP_LOCAL_IN, 143 .hooknum = NF_INET_LOCAL_IN,
144 .priority = NF_IP_PRI_MANGLE, 144 .priority = NF_IP_PRI_MANGLE,
145 }, 145 },
146 { 146 {
147 .hook = ipt_route_hook, 147 .hook = ipt_route_hook,
148 .owner = THIS_MODULE, 148 .owner = THIS_MODULE,
149 .pf = PF_INET, 149 .pf = PF_INET,
150 .hooknum = NF_IP_FORWARD, 150 .hooknum = NF_INET_FORWARD,
151 .priority = NF_IP_PRI_MANGLE, 151 .priority = NF_IP_PRI_MANGLE,
152 }, 152 },
153 { 153 {
154 .hook = ipt_local_hook, 154 .hook = ipt_local_hook,
155 .owner = THIS_MODULE, 155 .owner = THIS_MODULE,
156 .pf = PF_INET, 156 .pf = PF_INET,
157 .hooknum = NF_IP_LOCAL_OUT, 157 .hooknum = NF_INET_LOCAL_OUT,
158 .priority = NF_IP_PRI_MANGLE, 158 .priority = NF_IP_PRI_MANGLE,
159 }, 159 },
160 { 160 {
161 .hook = ipt_route_hook, 161 .hook = ipt_route_hook,
162 .owner = THIS_MODULE, 162 .owner = THIS_MODULE,
163 .pf = PF_INET, 163 .pf = PF_INET,
164 .hooknum = NF_IP_POST_ROUTING, 164 .hooknum = NF_INET_POST_ROUTING,
165 .priority = NF_IP_PRI_MANGLE, 165 .priority = NF_IP_PRI_MANGLE,
166 }, 166 },
167}; 167};
168 168
169static int __net_init iptable_mangle_net_init(struct net *net)
170{
171 /* Register table */
172 net->ipv4.iptable_mangle =
173 ipt_register_table(net, &packet_mangler, &initial_table.repl);
174 if (IS_ERR(net->ipv4.iptable_mangle))
175 return PTR_ERR(net->ipv4.iptable_mangle);
176 return 0;
177}
178
179static void __net_exit iptable_mangle_net_exit(struct net *net)
180{
181 ipt_unregister_table(net->ipv4.iptable_mangle);
182}
183
184static struct pernet_operations iptable_mangle_net_ops = {
185 .init = iptable_mangle_net_init,
186 .exit = iptable_mangle_net_exit,
187};
188
169static int __init iptable_mangle_init(void) 189static int __init iptable_mangle_init(void)
170{ 190{
171 int ret; 191 int ret;
172 192
173 /* Register table */ 193 ret = register_pernet_subsys(&iptable_mangle_net_ops);
174 ret = ipt_register_table(&packet_mangler, &initial_table.repl);
175 if (ret < 0) 194 if (ret < 0)
176 return ret; 195 return ret;
177 196
@@ -183,14 +202,14 @@ static int __init iptable_mangle_init(void)
183 return ret; 202 return ret;
184 203
185 cleanup_table: 204 cleanup_table:
186 ipt_unregister_table(&packet_mangler); 205 unregister_pernet_subsys(&iptable_mangle_net_ops);
187 return ret; 206 return ret;
188} 207}
189 208
190static void __exit iptable_mangle_fini(void) 209static void __exit iptable_mangle_fini(void)
191{ 210{
192 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 211 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
193 ipt_unregister_table(&packet_mangler); 212 unregister_pernet_subsys(&iptable_mangle_net_ops);
194} 213}
195 214
196module_init(iptable_mangle_init); 215module_init(iptable_mangle_init);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 5de6e57ac55c..e41fe8ca4e1c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -7,26 +7,26 @@
7#include <linux/netfilter_ipv4/ip_tables.h> 7#include <linux/netfilter_ipv4/ip_tables.h>
8#include <net/ip.h> 8#include <net/ip.h>
9 9
10#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) 10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
11 11
12static struct 12static struct
13{ 13{
14 struct ipt_replace repl; 14 struct ipt_replace repl;
15 struct ipt_standard entries[2]; 15 struct ipt_standard entries[2];
16 struct ipt_error term; 16 struct ipt_error term;
17} initial_table __initdata = { 17} initial_table __net_initdata = {
18 .repl = { 18 .repl = {
19 .name = "raw", 19 .name = "raw",
20 .valid_hooks = RAW_VALID_HOOKS, 20 .valid_hooks = RAW_VALID_HOOKS,
21 .num_entries = 3, 21 .num_entries = 3,
22 .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), 22 .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
23 .hook_entry = { 23 .hook_entry = {
24 [NF_IP_PRE_ROUTING] = 0, 24 [NF_INET_PRE_ROUTING] = 0,
25 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) 25 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
26 }, 26 },
27 .underflow = { 27 .underflow = {
28 [NF_IP_PRE_ROUTING] = 0, 28 [NF_INET_PRE_ROUTING] = 0,
29 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) 29 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
30 }, 30 },
31 }, 31 },
32 .entries = { 32 .entries = {
@@ -52,7 +52,7 @@ ipt_hook(unsigned int hook,
52 const struct net_device *out, 52 const struct net_device *out,
53 int (*okfn)(struct sk_buff *)) 53 int (*okfn)(struct sk_buff *))
54{ 54{
55 return ipt_do_table(skb, hook, in, out, &packet_raw); 55 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
56} 56}
57 57
58static unsigned int 58static unsigned int
@@ -66,37 +66,56 @@ ipt_local_hook(unsigned int hook,
66 if (skb->len < sizeof(struct iphdr) || 66 if (skb->len < sizeof(struct iphdr) ||
67 ip_hdrlen(skb) < sizeof(struct iphdr)) { 67 ip_hdrlen(skb) < sizeof(struct iphdr)) {
68 if (net_ratelimit()) 68 if (net_ratelimit())
69 printk("iptable_raw: ignoring short SOCK_RAW" 69 printk("iptable_raw: ignoring short SOCK_RAW "
70 "packet.\n"); 70 "packet.\n");
71 return NF_ACCEPT; 71 return NF_ACCEPT;
72 } 72 }
73 return ipt_do_table(skb, hook, in, out, &packet_raw); 73 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
74} 74}
75 75
76/* 'raw' is the very first table. */ 76/* 'raw' is the very first table. */
77static struct nf_hook_ops ipt_ops[] = { 77static struct nf_hook_ops ipt_ops[] __read_mostly = {
78 { 78 {
79 .hook = ipt_hook, 79 .hook = ipt_hook,
80 .pf = PF_INET, 80 .pf = PF_INET,
81 .hooknum = NF_IP_PRE_ROUTING, 81 .hooknum = NF_INET_PRE_ROUTING,
82 .priority = NF_IP_PRI_RAW, 82 .priority = NF_IP_PRI_RAW,
83 .owner = THIS_MODULE, 83 .owner = THIS_MODULE,
84 }, 84 },
85 { 85 {
86 .hook = ipt_local_hook, 86 .hook = ipt_local_hook,
87 .pf = PF_INET, 87 .pf = PF_INET,
88 .hooknum = NF_IP_LOCAL_OUT, 88 .hooknum = NF_INET_LOCAL_OUT,
89 .priority = NF_IP_PRI_RAW, 89 .priority = NF_IP_PRI_RAW,
90 .owner = THIS_MODULE, 90 .owner = THIS_MODULE,
91 }, 91 },
92}; 92};
93 93
94static int __net_init iptable_raw_net_init(struct net *net)
95{
96 /* Register table */
97 net->ipv4.iptable_raw =
98 ipt_register_table(net, &packet_raw, &initial_table.repl);
99 if (IS_ERR(net->ipv4.iptable_raw))
100 return PTR_ERR(net->ipv4.iptable_raw);
101 return 0;
102}
103
104static void __net_exit iptable_raw_net_exit(struct net *net)
105{
106 ipt_unregister_table(net->ipv4.iptable_raw);
107}
108
109static struct pernet_operations iptable_raw_net_ops = {
110 .init = iptable_raw_net_init,
111 .exit = iptable_raw_net_exit,
112};
113
94static int __init iptable_raw_init(void) 114static int __init iptable_raw_init(void)
95{ 115{
96 int ret; 116 int ret;
97 117
98 /* Register table */ 118 ret = register_pernet_subsys(&iptable_raw_net_ops);
99 ret = ipt_register_table(&packet_raw, &initial_table.repl);
100 if (ret < 0) 119 if (ret < 0)
101 return ret; 120 return ret;
102 121
@@ -108,14 +127,14 @@ static int __init iptable_raw_init(void)
108 return ret; 127 return ret;
109 128
110 cleanup_table: 129 cleanup_table:
111 ipt_unregister_table(&packet_raw); 130 unregister_pernet_subsys(&iptable_raw_net_ops);
112 return ret; 131 return ret;
113} 132}
114 133
115static void __exit iptable_raw_fini(void) 134static void __exit iptable_raw_fini(void)
116{ 135{
117 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 136 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
118 ipt_unregister_table(&packet_raw); 137 unregister_pernet_subsys(&iptable_raw_net_ops);
119} 138}
120 139
121module_init(iptable_raw_init); 140module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 831e9b29806d..a65b845c5f15 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -27,7 +27,8 @@
27static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 27static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
28 struct nf_conntrack_tuple *tuple) 28 struct nf_conntrack_tuple *tuple)
29{ 29{
30 __be32 _addrs[2], *ap; 30 const __be32 *ap;
31 __be32 _addrs[2];
31 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), 32 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
32 sizeof(u_int32_t) * 2, _addrs); 33 sizeof(u_int32_t) * 2, _addrs);
33 if (ap == NULL) 34 if (ap == NULL)
@@ -56,12 +57,6 @@ static int ipv4_print_tuple(struct seq_file *s,
56 NIPQUAD(tuple->dst.u3.ip)); 57 NIPQUAD(tuple->dst.u3.ip));
57} 58}
58 59
59static int ipv4_print_conntrack(struct seq_file *s,
60 const struct nf_conn *conntrack)
61{
62 return 0;
63}
64
65/* Returns new sk_buff, or NULL */ 60/* Returns new sk_buff, or NULL */
66static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) 61static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
67{ 62{
@@ -82,7 +77,8 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
82static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 77static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
83 unsigned int *dataoff, u_int8_t *protonum) 78 unsigned int *dataoff, u_int8_t *protonum)
84{ 79{
85 struct iphdr _iph, *iph; 80 const struct iphdr *iph;
81 struct iphdr _iph;
86 82
87 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 83 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
88 if (iph == NULL) 84 if (iph == NULL)
@@ -117,8 +113,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
117{ 113{
118 struct nf_conn *ct; 114 struct nf_conn *ct;
119 enum ip_conntrack_info ctinfo; 115 enum ip_conntrack_info ctinfo;
120 struct nf_conn_help *help; 116 const struct nf_conn_help *help;
121 struct nf_conntrack_helper *helper; 117 const struct nf_conntrack_helper *helper;
122 118
123 /* This is where we call the helper: as the packet goes out. */ 119 /* This is where we call the helper: as the packet goes out. */
124 ct = nf_ct_get(skb, &ctinfo); 120 ct = nf_ct_get(skb, &ctinfo);
@@ -150,7 +146,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
150 /* Gather fragments. */ 146 /* Gather fragments. */
151 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 147 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
152 if (nf_ct_ipv4_gather_frags(skb, 148 if (nf_ct_ipv4_gather_frags(skb,
153 hooknum == NF_IP_PRE_ROUTING ? 149 hooknum == NF_INET_PRE_ROUTING ?
154 IP_DEFRAG_CONNTRACK_IN : 150 IP_DEFRAG_CONNTRACK_IN :
155 IP_DEFRAG_CONNTRACK_OUT)) 151 IP_DEFRAG_CONNTRACK_OUT))
156 return NF_STOLEN; 152 return NF_STOLEN;
@@ -185,61 +181,61 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
185 181
186/* Connection tracking may drop packets, but never alters them, so 182/* Connection tracking may drop packets, but never alters them, so
187 make it the first hook. */ 183 make it the first hook. */
188static struct nf_hook_ops ipv4_conntrack_ops[] = { 184static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
189 { 185 {
190 .hook = ipv4_conntrack_defrag, 186 .hook = ipv4_conntrack_defrag,
191 .owner = THIS_MODULE, 187 .owner = THIS_MODULE,
192 .pf = PF_INET, 188 .pf = PF_INET,
193 .hooknum = NF_IP_PRE_ROUTING, 189 .hooknum = NF_INET_PRE_ROUTING,
194 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 190 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
195 }, 191 },
196 { 192 {
197 .hook = ipv4_conntrack_in, 193 .hook = ipv4_conntrack_in,
198 .owner = THIS_MODULE, 194 .owner = THIS_MODULE,
199 .pf = PF_INET, 195 .pf = PF_INET,
200 .hooknum = NF_IP_PRE_ROUTING, 196 .hooknum = NF_INET_PRE_ROUTING,
201 .priority = NF_IP_PRI_CONNTRACK, 197 .priority = NF_IP_PRI_CONNTRACK,
202 }, 198 },
203 { 199 {
204 .hook = ipv4_conntrack_defrag, 200 .hook = ipv4_conntrack_defrag,
205 .owner = THIS_MODULE, 201 .owner = THIS_MODULE,
206 .pf = PF_INET, 202 .pf = PF_INET,
207 .hooknum = NF_IP_LOCAL_OUT, 203 .hooknum = NF_INET_LOCAL_OUT,
208 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 204 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
209 }, 205 },
210 { 206 {
211 .hook = ipv4_conntrack_local, 207 .hook = ipv4_conntrack_local,
212 .owner = THIS_MODULE, 208 .owner = THIS_MODULE,
213 .pf = PF_INET, 209 .pf = PF_INET,
214 .hooknum = NF_IP_LOCAL_OUT, 210 .hooknum = NF_INET_LOCAL_OUT,
215 .priority = NF_IP_PRI_CONNTRACK, 211 .priority = NF_IP_PRI_CONNTRACK,
216 }, 212 },
217 { 213 {
218 .hook = ipv4_conntrack_help, 214 .hook = ipv4_conntrack_help,
219 .owner = THIS_MODULE, 215 .owner = THIS_MODULE,
220 .pf = PF_INET, 216 .pf = PF_INET,
221 .hooknum = NF_IP_POST_ROUTING, 217 .hooknum = NF_INET_POST_ROUTING,
222 .priority = NF_IP_PRI_CONNTRACK_HELPER, 218 .priority = NF_IP_PRI_CONNTRACK_HELPER,
223 }, 219 },
224 { 220 {
225 .hook = ipv4_conntrack_help, 221 .hook = ipv4_conntrack_help,
226 .owner = THIS_MODULE, 222 .owner = THIS_MODULE,
227 .pf = PF_INET, 223 .pf = PF_INET,
228 .hooknum = NF_IP_LOCAL_IN, 224 .hooknum = NF_INET_LOCAL_IN,
229 .priority = NF_IP_PRI_CONNTRACK_HELPER, 225 .priority = NF_IP_PRI_CONNTRACK_HELPER,
230 }, 226 },
231 { 227 {
232 .hook = ipv4_confirm, 228 .hook = ipv4_confirm,
233 .owner = THIS_MODULE, 229 .owner = THIS_MODULE,
234 .pf = PF_INET, 230 .pf = PF_INET,
235 .hooknum = NF_IP_POST_ROUTING, 231 .hooknum = NF_INET_POST_ROUTING,
236 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 232 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
237 }, 233 },
238 { 234 {
239 .hook = ipv4_confirm, 235 .hook = ipv4_confirm,
240 .owner = THIS_MODULE, 236 .owner = THIS_MODULE,
241 .pf = PF_INET, 237 .pf = PF_INET,
242 .hooknum = NF_IP_LOCAL_IN, 238 .hooknum = NF_INET_LOCAL_IN,
243 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 239 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
244 }, 240 },
245}; 241};
@@ -305,8 +301,8 @@ static ctl_table ip_ct_sysctl_table[] = {
305static int 301static int
306getorigdst(struct sock *sk, int optval, void __user *user, int *len) 302getorigdst(struct sock *sk, int optval, void __user *user, int *len)
307{ 303{
308 struct inet_sock *inet = inet_sk(sk); 304 const struct inet_sock *inet = inet_sk(sk);
309 struct nf_conntrack_tuple_hash *h; 305 const struct nf_conntrack_tuple_hash *h;
310 struct nf_conntrack_tuple tuple; 306 struct nf_conntrack_tuple tuple;
311 307
312 NF_CT_TUPLE_U_BLANK(&tuple); 308 NF_CT_TUPLE_U_BLANK(&tuple);
@@ -363,10 +359,8 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
363static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 359static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
364 const struct nf_conntrack_tuple *tuple) 360 const struct nf_conntrack_tuple *tuple)
365{ 361{
366 NLA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), 362 NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
367 &tuple->src.u3.ip); 363 NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
368 NLA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
369 &tuple->dst.u3.ip);
370 return 0; 364 return 0;
371 365
372nla_put_failure: 366nla_put_failure:
@@ -384,8 +378,8 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
384 if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) 378 if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
385 return -EINVAL; 379 return -EINVAL;
386 380
387 t->src.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_SRC]); 381 t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
388 t->dst.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_DST]); 382 t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
389 383
390 return 0; 384 return 0;
391} 385}
@@ -405,7 +399,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
405 .pkt_to_tuple = ipv4_pkt_to_tuple, 399 .pkt_to_tuple = ipv4_pkt_to_tuple,
406 .invert_tuple = ipv4_invert_tuple, 400 .invert_tuple = ipv4_invert_tuple,
407 .print_tuple = ipv4_print_tuple, 401 .print_tuple = ipv4_print_tuple,
408 .print_conntrack = ipv4_print_conntrack,
409 .get_l4proto = ipv4_get_l4proto, 402 .get_l4proto = ipv4_get_l4proto,
410#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 403#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
411 .tuple_to_nlattr = ipv4_tuple_to_nlattr, 404 .tuple_to_nlattr = ipv4_tuple_to_nlattr,
@@ -419,6 +412,9 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
419 .me = THIS_MODULE, 412 .me = THIS_MODULE,
420}; 413};
421 414
415module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
416 &nf_conntrack_htable_size, 0600);
417
422MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 418MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
423MODULE_ALIAS("ip_conntrack"); 419MODULE_ALIAS("ip_conntrack");
424MODULE_LICENSE("GPL"); 420MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 741f3dfaa5a1..089252e82c01 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -39,12 +39,14 @@ struct ct_iter_state {
39static struct hlist_node *ct_get_first(struct seq_file *seq) 39static struct hlist_node *ct_get_first(struct seq_file *seq)
40{ 40{
41 struct ct_iter_state *st = seq->private; 41 struct ct_iter_state *st = seq->private;
42 struct hlist_node *n;
42 43
43 for (st->bucket = 0; 44 for (st->bucket = 0;
44 st->bucket < nf_conntrack_htable_size; 45 st->bucket < nf_conntrack_htable_size;
45 st->bucket++) { 46 st->bucket++) {
46 if (!hlist_empty(&nf_conntrack_hash[st->bucket])) 47 n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
47 return nf_conntrack_hash[st->bucket].first; 48 if (n)
49 return n;
48 } 50 }
49 return NULL; 51 return NULL;
50} 52}
@@ -54,11 +56,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq,
54{ 56{
55 struct ct_iter_state *st = seq->private; 57 struct ct_iter_state *st = seq->private;
56 58
57 head = head->next; 59 head = rcu_dereference(head->next);
58 while (head == NULL) { 60 while (head == NULL) {
59 if (++st->bucket >= nf_conntrack_htable_size) 61 if (++st->bucket >= nf_conntrack_htable_size)
60 return NULL; 62 return NULL;
61 head = nf_conntrack_hash[st->bucket].first; 63 head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
62 } 64 }
63 return head; 65 return head;
64} 66}
@@ -74,8 +76,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
74} 76}
75 77
76static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 78static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
79 __acquires(RCU)
77{ 80{
78 read_lock_bh(&nf_conntrack_lock); 81 rcu_read_lock();
79 return ct_get_idx(seq, *pos); 82 return ct_get_idx(seq, *pos);
80} 83}
81 84
@@ -86,16 +89,17 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
86} 89}
87 90
88static void ct_seq_stop(struct seq_file *s, void *v) 91static void ct_seq_stop(struct seq_file *s, void *v)
92 __releases(RCU)
89{ 93{
90 read_unlock_bh(&nf_conntrack_lock); 94 rcu_read_unlock();
91} 95}
92 96
93static int ct_seq_show(struct seq_file *s, void *v) 97static int ct_seq_show(struct seq_file *s, void *v)
94{ 98{
95 const struct nf_conntrack_tuple_hash *hash = v; 99 const struct nf_conntrack_tuple_hash *hash = v;
96 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 100 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
97 struct nf_conntrack_l3proto *l3proto; 101 const struct nf_conntrack_l3proto *l3proto;
98 struct nf_conntrack_l4proto *l4proto; 102 const struct nf_conntrack_l4proto *l4proto;
99 103
100 NF_CT_ASSERT(ct); 104 NF_CT_ASSERT(ct);
101 105
@@ -121,10 +125,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
121 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 125 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
122 return -ENOSPC; 126 return -ENOSPC;
123 127
124 if (l3proto->print_conntrack(s, ct)) 128 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
125 return -ENOSPC;
126
127 if (l4proto->print_conntrack(s, ct))
128 return -ENOSPC; 129 return -ENOSPC;
129 130
130 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 131 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
@@ -194,10 +195,12 @@ struct ct_expect_iter_state {
194static struct hlist_node *ct_expect_get_first(struct seq_file *seq) 195static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
195{ 196{
196 struct ct_expect_iter_state *st = seq->private; 197 struct ct_expect_iter_state *st = seq->private;
198 struct hlist_node *n;
197 199
198 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 200 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
199 if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) 201 n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
200 return nf_ct_expect_hash[st->bucket].first; 202 if (n)
203 return n;
201 } 204 }
202 return NULL; 205 return NULL;
203} 206}
@@ -207,11 +210,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
207{ 210{
208 struct ct_expect_iter_state *st = seq->private; 211 struct ct_expect_iter_state *st = seq->private;
209 212
210 head = head->next; 213 head = rcu_dereference(head->next);
211 while (head == NULL) { 214 while (head == NULL) {
212 if (++st->bucket >= nf_ct_expect_hsize) 215 if (++st->bucket >= nf_ct_expect_hsize)
213 return NULL; 216 return NULL;
214 head = nf_ct_expect_hash[st->bucket].first; 217 head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
215 } 218 }
216 return head; 219 return head;
217} 220}
@@ -227,8 +230,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
227} 230}
228 231
229static void *exp_seq_start(struct seq_file *seq, loff_t *pos) 232static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
233 __acquires(RCU)
230{ 234{
231 read_lock_bh(&nf_conntrack_lock); 235 rcu_read_lock();
232 return ct_expect_get_idx(seq, *pos); 236 return ct_expect_get_idx(seq, *pos);
233} 237}
234 238
@@ -239,14 +243,15 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
239} 243}
240 244
241static void exp_seq_stop(struct seq_file *seq, void *v) 245static void exp_seq_stop(struct seq_file *seq, void *v)
246 __releases(RCU)
242{ 247{
243 read_unlock_bh(&nf_conntrack_lock); 248 rcu_read_unlock();
244} 249}
245 250
246static int exp_seq_show(struct seq_file *s, void *v) 251static int exp_seq_show(struct seq_file *s, void *v)
247{ 252{
248 struct nf_conntrack_expect *exp; 253 struct nf_conntrack_expect *exp;
249 struct hlist_node *n = v; 254 const struct hlist_node *n = v;
250 255
251 exp = hlist_entry(n, struct nf_conntrack_expect, hnode); 256 exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
252 257
@@ -327,7 +332,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
327static int ct_cpu_seq_show(struct seq_file *seq, void *v) 332static int ct_cpu_seq_show(struct seq_file *seq, void *v)
328{ 333{
329 unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); 334 unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
330 struct ip_conntrack_stat *st = v; 335 const struct ip_conntrack_stat *st = v;
331 336
332 if (v == SEQ_START_TOKEN) { 337 if (v == SEQ_START_TOKEN) {
333 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); 338 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index adcbaf6d4299..6873fddb3529 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -18,6 +18,7 @@
18#include <net/netfilter/nf_conntrack_tuple.h> 18#include <net/netfilter/nf_conntrack_tuple.h>
19#include <net/netfilter/nf_conntrack_l4proto.h> 19#include <net/netfilter/nf_conntrack_l4proto.h>
20#include <net/netfilter/nf_conntrack_core.h> 20#include <net/netfilter/nf_conntrack_core.h>
21#include <net/netfilter/nf_log.h>
21 22
22static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; 23static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
23 24
@@ -25,7 +26,8 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
25 unsigned int dataoff, 26 unsigned int dataoff,
26 struct nf_conntrack_tuple *tuple) 27 struct nf_conntrack_tuple *tuple)
27{ 28{
28 struct icmphdr _hdr, *hp; 29 const struct icmphdr *hp;
30 struct icmphdr _hdr;
29 31
30 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 32 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
31 if (hp == NULL) 33 if (hp == NULL)
@@ -73,13 +75,6 @@ static int icmp_print_tuple(struct seq_file *s,
73 ntohs(tuple->src.u.icmp.id)); 75 ntohs(tuple->src.u.icmp.id));
74} 76}
75 77
76/* Print out the private part of the conntrack. */
77static int icmp_print_conntrack(struct seq_file *s,
78 const struct nf_conn *conntrack)
79{
80 return 0;
81}
82
83/* Returns verdict for packet, or -1 for invalid. */ 78/* Returns verdict for packet, or -1 for invalid. */
84static int icmp_packet(struct nf_conn *ct, 79static int icmp_packet(struct nf_conn *ct,
85 const struct sk_buff *skb, 80 const struct sk_buff *skb,
@@ -106,7 +101,7 @@ static int icmp_packet(struct nf_conn *ct,
106} 101}
107 102
108/* Called when a new connection for this protocol found. */ 103/* Called when a new connection for this protocol found. */
109static int icmp_new(struct nf_conn *conntrack, 104static int icmp_new(struct nf_conn *ct,
110 const struct sk_buff *skb, unsigned int dataoff) 105 const struct sk_buff *skb, unsigned int dataoff)
111{ 106{
112 static const u_int8_t valid_new[] = { 107 static const u_int8_t valid_new[] = {
@@ -116,19 +111,18 @@ static int icmp_new(struct nf_conn *conntrack,
116 [ICMP_ADDRESS] = 1 111 [ICMP_ADDRESS] = 1
117 }; 112 };
118 113
119 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) 114 if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
120 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { 115 || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
121 /* Can't create a new ICMP `conn' with this. */ 116 /* Can't create a new ICMP `conn' with this. */
122 pr_debug("icmp: can't create new conn with type %u\n", 117 pr_debug("icmp: can't create new conn with type %u\n",
123 conntrack->tuplehash[0].tuple.dst.u.icmp.type); 118 ct->tuplehash[0].tuple.dst.u.icmp.type);
124 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); 119 NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple);
125 return 0; 120 return 0;
126 } 121 }
127 atomic_set(&conntrack->proto.icmp.count, 0); 122 atomic_set(&ct->proto.icmp.count, 0);
128 return 1; 123 return 1;
129} 124}
130 125
131extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
132/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ 126/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
133static int 127static int
134icmp_error_message(struct sk_buff *skb, 128icmp_error_message(struct sk_buff *skb,
@@ -136,8 +130,8 @@ icmp_error_message(struct sk_buff *skb,
136 unsigned int hooknum) 130 unsigned int hooknum)
137{ 131{
138 struct nf_conntrack_tuple innertuple, origtuple; 132 struct nf_conntrack_tuple innertuple, origtuple;
139 struct nf_conntrack_l4proto *innerproto; 133 const struct nf_conntrack_l4proto *innerproto;
140 struct nf_conntrack_tuple_hash *h; 134 const struct nf_conntrack_tuple_hash *h;
141 135
142 NF_CT_ASSERT(skb->nfct == NULL); 136 NF_CT_ASSERT(skb->nfct == NULL);
143 137
@@ -183,7 +177,8 @@ static int
183icmp_error(struct sk_buff *skb, unsigned int dataoff, 177icmp_error(struct sk_buff *skb, unsigned int dataoff,
184 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) 178 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
185{ 179{
186 struct icmphdr _ih, *icmph; 180 const struct icmphdr *icmph;
181 struct icmphdr _ih;
187 182
188 /* Not enough header? */ 183 /* Not enough header? */
189 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); 184 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
@@ -195,7 +190,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
195 } 190 }
196 191
197 /* See ip_conntrack_proto_tcp.c */ 192 /* See ip_conntrack_proto_tcp.c */
198 if (nf_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && 193 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
199 nf_ip_checksum(skb, hooknum, dataoff, 0)) { 194 nf_ip_checksum(skb, hooknum, dataoff, 0)) {
200 if (LOG_INVALID(IPPROTO_ICMP)) 195 if (LOG_INVALID(IPPROTO_ICMP))
201 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 196 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
@@ -235,12 +230,9 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
235static int icmp_tuple_to_nlattr(struct sk_buff *skb, 230static int icmp_tuple_to_nlattr(struct sk_buff *skb,
236 const struct nf_conntrack_tuple *t) 231 const struct nf_conntrack_tuple *t)
237{ 232{
238 NLA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), 233 NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id);
239 &t->src.u.icmp.id); 234 NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type);
240 NLA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), 235 NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code);
241 &t->dst.u.icmp.type);
242 NLA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
243 &t->dst.u.icmp.code);
244 236
245 return 0; 237 return 0;
246 238
@@ -262,12 +254,9 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
262 || !tb[CTA_PROTO_ICMP_ID]) 254 || !tb[CTA_PROTO_ICMP_ID])
263 return -EINVAL; 255 return -EINVAL;
264 256
265 tuple->dst.u.icmp.type = 257 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
266 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_TYPE]); 258 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
267 tuple->dst.u.icmp.code = 259 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
268 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_CODE]);
269 tuple->src.u.icmp.id =
270 *(__be16 *)nla_data(tb[CTA_PROTO_ICMP_ID]);
271 260
272 if (tuple->dst.u.icmp.type >= sizeof(invmap) 261 if (tuple->dst.u.icmp.type >= sizeof(invmap)
273 || !invmap[tuple->dst.u.icmp.type]) 262 || !invmap[tuple->dst.u.icmp.type])
@@ -315,7 +304,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
315 .pkt_to_tuple = icmp_pkt_to_tuple, 304 .pkt_to_tuple = icmp_pkt_to_tuple,
316 .invert_tuple = icmp_invert_tuple, 305 .invert_tuple = icmp_invert_tuple,
317 .print_tuple = icmp_print_tuple, 306 .print_tuple = icmp_print_tuple,
318 .print_conntrack = icmp_print_conntrack,
319 .packet = icmp_packet, 307 .packet = icmp_packet,
320 .new = icmp_new, 308 .new = icmp_new,
321 .error = icmp_error, 309 .error = icmp_error,
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 35a5aa69cd92..c31b87668250 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -69,7 +69,7 @@ static void __exit nf_nat_amanda_fini(void)
69 69
70static int __init nf_nat_amanda_init(void) 70static int __init nf_nat_amanda_init(void)
71{ 71{
72 BUG_ON(rcu_dereference(nf_nat_amanda_hook)); 72 BUG_ON(nf_nat_amanda_hook != NULL);
73 rcu_assign_pointer(nf_nat_amanda_hook, help); 73 rcu_assign_pointer(nf_nat_amanda_hook, help);
74 return 0; 74 return 0;
75} 75}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 56e93f692e82..dd07362d2b8f 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -31,29 +31,30 @@
31#include <net/netfilter/nf_conntrack_l3proto.h> 31#include <net/netfilter/nf_conntrack_l3proto.h>
32#include <net/netfilter/nf_conntrack_l4proto.h> 32#include <net/netfilter/nf_conntrack_l4proto.h>
33 33
34static DEFINE_RWLOCK(nf_nat_lock); 34static DEFINE_SPINLOCK(nf_nat_lock);
35 35
36static struct nf_conntrack_l3proto *l3proto = NULL; 36static struct nf_conntrack_l3proto *l3proto __read_mostly;
37 37
38/* Calculated at init based on memory size */ 38/* Calculated at init based on memory size */
39static unsigned int nf_nat_htable_size; 39static unsigned int nf_nat_htable_size __read_mostly;
40static int nf_nat_vmalloced; 40static int nf_nat_vmalloced;
41 41
42static struct hlist_head *bysource; 42static struct hlist_head *bysource __read_mostly;
43 43
44#define MAX_IP_NAT_PROTO 256 44#define MAX_IP_NAT_PROTO 256
45static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; 45static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
46 __read_mostly;
46 47
47static inline struct nf_nat_protocol * 48static inline const struct nf_nat_protocol *
48__nf_nat_proto_find(u_int8_t protonum) 49__nf_nat_proto_find(u_int8_t protonum)
49{ 50{
50 return rcu_dereference(nf_nat_protos[protonum]); 51 return rcu_dereference(nf_nat_protos[protonum]);
51} 52}
52 53
53struct nf_nat_protocol * 54const struct nf_nat_protocol *
54nf_nat_proto_find_get(u_int8_t protonum) 55nf_nat_proto_find_get(u_int8_t protonum)
55{ 56{
56 struct nf_nat_protocol *p; 57 const struct nf_nat_protocol *p;
57 58
58 rcu_read_lock(); 59 rcu_read_lock();
59 p = __nf_nat_proto_find(protonum); 60 p = __nf_nat_proto_find(protonum);
@@ -66,7 +67,7 @@ nf_nat_proto_find_get(u_int8_t protonum)
66EXPORT_SYMBOL_GPL(nf_nat_proto_find_get); 67EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
67 68
68void 69void
69nf_nat_proto_put(struct nf_nat_protocol *p) 70nf_nat_proto_put(const struct nf_nat_protocol *p)
70{ 71{
71 module_put(p->me); 72 module_put(p->me);
72} 73}
@@ -76,10 +77,13 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
76static inline unsigned int 77static inline unsigned int
77hash_by_src(const struct nf_conntrack_tuple *tuple) 78hash_by_src(const struct nf_conntrack_tuple *tuple)
78{ 79{
80 unsigned int hash;
81
79 /* Original src, to ensure we map it consistently if poss. */ 82 /* Original src, to ensure we map it consistently if poss. */
80 return jhash_3words((__force u32)tuple->src.u3.ip, 83 hash = jhash_3words((__force u32)tuple->src.u3.ip,
81 (__force u32)tuple->src.u.all, 84 (__force u32)tuple->src.u.all,
82 tuple->dst.protonum, 0) % nf_nat_htable_size; 85 tuple->dst.protonum, 0);
86 return ((u64)hash * nf_nat_htable_size) >> 32;
83} 87}
84 88
85/* Is this tuple already taken? (not by us) */ 89/* Is this tuple already taken? (not by us) */
@@ -105,7 +109,7 @@ static int
105in_range(const struct nf_conntrack_tuple *tuple, 109in_range(const struct nf_conntrack_tuple *tuple,
106 const struct nf_nat_range *range) 110 const struct nf_nat_range *range)
107{ 111{
108 struct nf_nat_protocol *proto; 112 const struct nf_nat_protocol *proto;
109 int ret = 0; 113 int ret = 0;
110 114
111 /* If we are supposed to map IPs, then we must be in the 115 /* If we are supposed to map IPs, then we must be in the
@@ -150,8 +154,8 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
150 struct nf_conn *ct; 154 struct nf_conn *ct;
151 struct hlist_node *n; 155 struct hlist_node *n;
152 156
153 read_lock_bh(&nf_nat_lock); 157 rcu_read_lock();
154 hlist_for_each_entry(nat, n, &bysource[h], bysource) { 158 hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
155 ct = nat->ct; 159 ct = nat->ct;
156 if (same_src(ct, tuple)) { 160 if (same_src(ct, tuple)) {
157 /* Copy source part from reply tuple. */ 161 /* Copy source part from reply tuple. */
@@ -160,12 +164,12 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
160 result->dst = tuple->dst; 164 result->dst = tuple->dst;
161 165
162 if (in_range(result, range)) { 166 if (in_range(result, range)) {
163 read_unlock_bh(&nf_nat_lock); 167 rcu_read_unlock();
164 return 1; 168 return 1;
165 } 169 }
166 } 170 }
167 } 171 }
168 read_unlock_bh(&nf_nat_lock); 172 rcu_read_unlock();
169 return 0; 173 return 0;
170} 174}
171 175
@@ -210,12 +214,13 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
210 maxip = ntohl(range->max_ip); 214 maxip = ntohl(range->max_ip);
211 j = jhash_2words((__force u32)tuple->src.u3.ip, 215 j = jhash_2words((__force u32)tuple->src.u3.ip,
212 (__force u32)tuple->dst.u3.ip, 0); 216 (__force u32)tuple->dst.u3.ip, 0);
213 *var_ipp = htonl(minip + j % (maxip - minip + 1)); 217 j = ((u64)j * (maxip - minip + 1)) >> 32;
218 *var_ipp = htonl(minip + j);
214} 219}
215 220
216/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING, 221/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
217 * we change the source to map into the range. For NF_IP_PRE_ROUTING 222 * we change the source to map into the range. For NF_INET_PRE_ROUTING
218 * and NF_IP_LOCAL_OUT, we change the destination to map into the 223 * and NF_INET_LOCAL_OUT, we change the destination to map into the
219 * range. It might not be possible to get a unique tuple, but we try. 224 * range. It might not be possible to get a unique tuple, but we try.
220 * At worst (or if we race), we will end up with a final duplicate in 225 * At worst (or if we race), we will end up with a final duplicate in
221 * __ip_conntrack_confirm and drop the packet. */ 226 * __ip_conntrack_confirm and drop the packet. */
@@ -226,7 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
226 struct nf_conn *ct, 231 struct nf_conn *ct,
227 enum nf_nat_manip_type maniptype) 232 enum nf_nat_manip_type maniptype)
228{ 233{
229 struct nf_nat_protocol *proto; 234 const struct nf_nat_protocol *proto;
230 235
231 /* 1) If this srcip/proto/src-proto-part is currently mapped, 236 /* 1) If this srcip/proto/src-proto-part is currently mapped,
232 and that same mapping gives a unique tuple within the given 237 and that same mapping gives a unique tuple within the given
@@ -276,12 +281,11 @@ out:
276unsigned int 281unsigned int
277nf_nat_setup_info(struct nf_conn *ct, 282nf_nat_setup_info(struct nf_conn *ct,
278 const struct nf_nat_range *range, 283 const struct nf_nat_range *range,
279 unsigned int hooknum) 284 enum nf_nat_manip_type maniptype)
280{ 285{
281 struct nf_conntrack_tuple curr_tuple, new_tuple; 286 struct nf_conntrack_tuple curr_tuple, new_tuple;
282 struct nf_conn_nat *nat; 287 struct nf_conn_nat *nat;
283 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); 288 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
284 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
285 289
286 /* nat helper or nfctnetlink also setup binding */ 290 /* nat helper or nfctnetlink also setup binding */
287 nat = nfct_nat(ct); 291 nat = nfct_nat(ct);
@@ -293,10 +297,8 @@ nf_nat_setup_info(struct nf_conn *ct,
293 } 297 }
294 } 298 }
295 299
296 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || 300 NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
297 hooknum == NF_IP_POST_ROUTING || 301 maniptype == IP_NAT_MANIP_DST);
298 hooknum == NF_IP_LOCAL_IN ||
299 hooknum == NF_IP_LOCAL_OUT);
300 BUG_ON(nf_nat_initialized(ct, maniptype)); 302 BUG_ON(nf_nat_initialized(ct, maniptype));
301 303
302 /* What we've got will look like inverse of reply. Normally 304 /* What we've got will look like inverse of reply. Normally
@@ -328,12 +330,12 @@ nf_nat_setup_info(struct nf_conn *ct,
328 unsigned int srchash; 330 unsigned int srchash;
329 331
330 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 332 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
331 write_lock_bh(&nf_nat_lock); 333 spin_lock_bh(&nf_nat_lock);
332 /* nf_conntrack_alter_reply might re-allocate exntension aera */ 334 /* nf_conntrack_alter_reply might re-allocate exntension aera */
333 nat = nfct_nat(ct); 335 nat = nfct_nat(ct);
334 nat->ct = ct; 336 nat->ct = ct;
335 hlist_add_head(&nat->bysource, &bysource[srchash]); 337 hlist_add_head_rcu(&nat->bysource, &bysource[srchash]);
336 write_unlock_bh(&nf_nat_lock); 338 spin_unlock_bh(&nf_nat_lock);
337 } 339 }
338 340
339 /* It's done. */ 341 /* It's done. */
@@ -355,7 +357,7 @@ manip_pkt(u_int16_t proto,
355 enum nf_nat_manip_type maniptype) 357 enum nf_nat_manip_type maniptype)
356{ 358{
357 struct iphdr *iph; 359 struct iphdr *iph;
358 struct nf_nat_protocol *p; 360 const struct nf_nat_protocol *p;
359 361
360 if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) 362 if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
361 return 0; 363 return 0;
@@ -372,10 +374,10 @@ manip_pkt(u_int16_t proto,
372 iph = (void *)skb->data + iphdroff; 374 iph = (void *)skb->data + iphdroff;
373 375
374 if (maniptype == IP_NAT_MANIP_SRC) { 376 if (maniptype == IP_NAT_MANIP_SRC) {
375 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 377 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
376 iph->saddr = target->src.u3.ip; 378 iph->saddr = target->src.u3.ip;
377 } else { 379 } else {
378 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 380 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
379 iph->daddr = target->dst.u3.ip; 381 iph->daddr = target->dst.u3.ip;
380 } 382 }
381 return 1; 383 return 1;
@@ -515,29 +517,29 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
515EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 517EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
516 518
517/* Protocol registration. */ 519/* Protocol registration. */
518int nf_nat_protocol_register(struct nf_nat_protocol *proto) 520int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
519{ 521{
520 int ret = 0; 522 int ret = 0;
521 523
522 write_lock_bh(&nf_nat_lock); 524 spin_lock_bh(&nf_nat_lock);
523 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 525 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
524 ret = -EBUSY; 526 ret = -EBUSY;
525 goto out; 527 goto out;
526 } 528 }
527 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); 529 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
528 out: 530 out:
529 write_unlock_bh(&nf_nat_lock); 531 spin_unlock_bh(&nf_nat_lock);
530 return ret; 532 return ret;
531} 533}
532EXPORT_SYMBOL(nf_nat_protocol_register); 534EXPORT_SYMBOL(nf_nat_protocol_register);
533 535
534/* Noone stores the protocol anywhere; simply delete it. */ 536/* Noone stores the protocol anywhere; simply delete it. */
535void nf_nat_protocol_unregister(struct nf_nat_protocol *proto) 537void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
536{ 538{
537 write_lock_bh(&nf_nat_lock); 539 spin_lock_bh(&nf_nat_lock);
538 rcu_assign_pointer(nf_nat_protos[proto->protonum], 540 rcu_assign_pointer(nf_nat_protos[proto->protonum],
539 &nf_nat_unknown_protocol); 541 &nf_nat_unknown_protocol);
540 write_unlock_bh(&nf_nat_lock); 542 spin_unlock_bh(&nf_nat_lock);
541 synchronize_rcu(); 543 synchronize_rcu();
542} 544}
543EXPORT_SYMBOL(nf_nat_protocol_unregister); 545EXPORT_SYMBOL(nf_nat_protocol_unregister);
@@ -547,10 +549,8 @@ int
547nf_nat_port_range_to_nlattr(struct sk_buff *skb, 549nf_nat_port_range_to_nlattr(struct sk_buff *skb,
548 const struct nf_nat_range *range) 550 const struct nf_nat_range *range)
549{ 551{
550 NLA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), 552 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.tcp.port);
551 &range->min.tcp.port); 553 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.tcp.port);
552 NLA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
553 &range->max.tcp.port);
554 554
555 return 0; 555 return 0;
556 556
@@ -568,8 +568,7 @@ nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
568 568
569 if (tb[CTA_PROTONAT_PORT_MIN]) { 569 if (tb[CTA_PROTONAT_PORT_MIN]) {
570 ret = 1; 570 ret = 1;
571 range->min.tcp.port = 571 range->min.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
572 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MIN]);
573 } 572 }
574 573
575 if (!tb[CTA_PROTONAT_PORT_MAX]) { 574 if (!tb[CTA_PROTONAT_PORT_MAX]) {
@@ -577,8 +576,7 @@ nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
577 range->max.tcp.port = range->min.tcp.port; 576 range->max.tcp.port = range->min.tcp.port;
578 } else { 577 } else {
579 ret = 1; 578 ret = 1;
580 range->max.tcp.port = 579 range->max.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
581 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MAX]);
582 } 580 }
583 581
584 return ret; 582 return ret;
@@ -596,10 +594,10 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
596 594
597 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); 595 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
598 596
599 write_lock_bh(&nf_nat_lock); 597 spin_lock_bh(&nf_nat_lock);
600 hlist_del(&nat->bysource); 598 hlist_del_rcu(&nat->bysource);
601 nat->ct = NULL; 599 nat->ct = NULL;
602 write_unlock_bh(&nf_nat_lock); 600 spin_unlock_bh(&nf_nat_lock);
603} 601}
604 602
605static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) 603static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
@@ -607,17 +605,14 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
607 struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT); 605 struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT);
608 struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old; 606 struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old;
609 struct nf_conn *ct = old_nat->ct; 607 struct nf_conn *ct = old_nat->ct;
610 unsigned int srchash;
611 608
612 if (!(ct->status & IPS_NAT_DONE_MASK)) 609 if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
613 return; 610 return;
614 611
615 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 612 spin_lock_bh(&nf_nat_lock);
616
617 write_lock_bh(&nf_nat_lock);
618 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); 613 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
619 new_nat->ct = ct; 614 new_nat->ct = ct;
620 write_unlock_bh(&nf_nat_lock); 615 spin_unlock_bh(&nf_nat_lock);
621} 616}
622 617
623static struct nf_ct_ext_type nat_extend __read_mostly = { 618static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -651,17 +646,13 @@ static int __init nf_nat_init(void)
651 } 646 }
652 647
653 /* Sew in builtin protocols. */ 648 /* Sew in builtin protocols. */
654 write_lock_bh(&nf_nat_lock); 649 spin_lock_bh(&nf_nat_lock);
655 for (i = 0; i < MAX_IP_NAT_PROTO; i++) 650 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
656 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); 651 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
657 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); 652 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
658 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); 653 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
659 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); 654 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
660 write_unlock_bh(&nf_nat_lock); 655 spin_unlock_bh(&nf_nat_lock);
661
662 for (i = 0; i < nf_nat_htable_size; i++) {
663 INIT_HLIST_HEAD(&bysource[i]);
664 }
665 656
666 /* Initialize fake conntrack so that NAT will skip it */ 657 /* Initialize fake conntrack so that NAT will skip it */
667 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 658 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
@@ -681,7 +672,7 @@ static int clean_nat(struct nf_conn *i, void *data)
681 672
682 if (!nat) 673 if (!nat)
683 return 0; 674 return 0;
684 memset(nat, 0, sizeof(nat)); 675 memset(nat, 0, sizeof(*nat));
685 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); 676 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
686 return 0; 677 return 0;
687} 678}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index e1a16d3ea4cb..a1d5d58a58bf 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -147,7 +147,7 @@ static void __exit nf_nat_ftp_fini(void)
147 147
148static int __init nf_nat_ftp_init(void) 148static int __init nf_nat_ftp_init(void)
149{ 149{
150 BUG_ON(rcu_dereference(nf_nat_ftp_hook)); 150 BUG_ON(nf_nat_ftp_hook != NULL);
151 rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); 151 rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
152 return 0; 152 return 0;
153} 153}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index a868c8c41328..ee47bf28c825 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -32,7 +32,8 @@ static int set_addr(struct sk_buff *skb,
32 __be32 ip; 32 __be32 ip;
33 __be16 port; 33 __be16 port;
34 } __attribute__ ((__packed__)) buf; 34 } __attribute__ ((__packed__)) buf;
35 struct tcphdr _tcph, *th; 35 const struct tcphdr *th;
36 struct tcphdr _tcph;
36 37
37 buf.ip = ip; 38 buf.ip = ip;
38 buf.port = port; 39 buf.port = port;
@@ -76,7 +77,7 @@ static int set_addr(struct sk_buff *skb,
76static int set_h225_addr(struct sk_buff *skb, 77static int set_h225_addr(struct sk_buff *skb,
77 unsigned char **data, int dataoff, 78 unsigned char **data, int dataoff,
78 TransportAddress *taddr, 79 TransportAddress *taddr,
79 union nf_conntrack_address *addr, __be16 port) 80 union nf_inet_addr *addr, __be16 port)
80{ 81{
81 return set_addr(skb, data, dataoff, taddr->ipAddress.ip, 82 return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
82 addr->ip, port); 83 addr->ip, port);
@@ -86,7 +87,7 @@ static int set_h225_addr(struct sk_buff *skb,
86static int set_h245_addr(struct sk_buff *skb, 87static int set_h245_addr(struct sk_buff *skb,
87 unsigned char **data, int dataoff, 88 unsigned char **data, int dataoff,
88 H245_TransportAddress *taddr, 89 H245_TransportAddress *taddr,
89 union nf_conntrack_address *addr, __be16 port) 90 union nf_inet_addr *addr, __be16 port)
90{ 91{
91 return set_addr(skb, data, dataoff, 92 return set_addr(skb, data, dataoff,
92 taddr->unicastAddress.iPAddress.network, 93 taddr->unicastAddress.iPAddress.network,
@@ -99,11 +100,11 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
99 unsigned char **data, 100 unsigned char **data,
100 TransportAddress *taddr, int count) 101 TransportAddress *taddr, int count)
101{ 102{
102 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 103 const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
103 int dir = CTINFO2DIR(ctinfo); 104 int dir = CTINFO2DIR(ctinfo);
104 int i; 105 int i;
105 __be16 port; 106 __be16 port;
106 union nf_conntrack_address addr; 107 union nf_inet_addr addr;
107 108
108 for (i = 0; i < count; i++) { 109 for (i = 0; i < count; i++) {
109 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) { 110 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) {
@@ -155,7 +156,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
155 int dir = CTINFO2DIR(ctinfo); 156 int dir = CTINFO2DIR(ctinfo);
156 int i; 157 int i;
157 __be16 port; 158 __be16 port;
158 union nf_conntrack_address addr; 159 union nf_inet_addr addr;
159 160
160 for (i = 0; i < count; i++) { 161 for (i = 0; i < count; i++) {
161 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && 162 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
@@ -389,18 +390,14 @@ static void ip_nat_q931_expect(struct nf_conn *new,
389 /* Change src to where master sends to */ 390 /* Change src to where master sends to */
390 range.flags = IP_NAT_RANGE_MAP_IPS; 391 range.flags = IP_NAT_RANGE_MAP_IPS;
391 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; 392 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
392 393 nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
393 /* hook doesn't matter, but it has to do source manip */
394 nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
395 394
396 /* For DST manip, map port here to where it's expected. */ 395 /* For DST manip, map port here to where it's expected. */
397 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 396 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
398 range.min = range.max = this->saved_proto; 397 range.min = range.max = this->saved_proto;
399 range.min_ip = range.max_ip = 398 range.min_ip = range.max_ip =
400 new->master->tuplehash[!this->dir].tuple.src.u3.ip; 399 new->master->tuplehash[!this->dir].tuple.src.u3.ip;
401 400 nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
402 /* hook doesn't matter, but it has to do destination manip */
403 nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
404} 401}
405 402
406/****************************************************************************/ 403/****************************************************************************/
@@ -412,7 +409,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
412 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 409 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
413 int dir = CTINFO2DIR(ctinfo); 410 int dir = CTINFO2DIR(ctinfo);
414 u_int16_t nated_port = ntohs(port); 411 u_int16_t nated_port = ntohs(port);
415 union nf_conntrack_address addr; 412 union nf_inet_addr addr;
416 413
417 /* Set expectations for NAT */ 414 /* Set expectations for NAT */
418 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; 415 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
@@ -479,17 +476,13 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
479 /* Change src to where master sends to */ 476 /* Change src to where master sends to */
480 range.flags = IP_NAT_RANGE_MAP_IPS; 477 range.flags = IP_NAT_RANGE_MAP_IPS;
481 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; 478 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
482 479 nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
483 /* hook doesn't matter, but it has to do source manip */
484 nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
485 480
486 /* For DST manip, map port here to where it's expected. */ 481 /* For DST manip, map port here to where it's expected. */
487 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 482 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
488 range.min = range.max = this->saved_proto; 483 range.min = range.max = this->saved_proto;
489 range.min_ip = range.max_ip = this->saved_ip; 484 range.min_ip = range.max_ip = this->saved_ip;
490 485 nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
491 /* hook doesn't matter, but it has to do destination manip */
492 nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
493} 486}
494 487
495/****************************************************************************/ 488/****************************************************************************/
@@ -544,15 +537,15 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
544/****************************************************************************/ 537/****************************************************************************/
545static int __init init(void) 538static int __init init(void)
546{ 539{
547 BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL); 540 BUG_ON(set_h245_addr_hook != NULL);
548 BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL); 541 BUG_ON(set_h225_addr_hook != NULL);
549 BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL); 542 BUG_ON(set_sig_addr_hook != NULL);
550 BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL); 543 BUG_ON(set_ras_addr_hook != NULL);
551 BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL); 544 BUG_ON(nat_rtp_rtcp_hook != NULL);
552 BUG_ON(rcu_dereference(nat_t120_hook) != NULL); 545 BUG_ON(nat_t120_hook != NULL);
553 BUG_ON(rcu_dereference(nat_h245_hook) != NULL); 546 BUG_ON(nat_h245_hook != NULL);
554 BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL); 547 BUG_ON(nat_callforwarding_hook != NULL);
555 BUG_ON(rcu_dereference(nat_q931_hook) != NULL); 548 BUG_ON(nat_q931_hook != NULL);
556 549
557 rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); 550 rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
558 rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); 551 rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 8718da00ef2a..ca57f47bbd25 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -20,6 +20,7 @@
20#include <linux/netfilter_ipv4.h> 20#include <linux/netfilter_ipv4.h>
21#include <net/netfilter/nf_conntrack.h> 21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_helper.h> 22#include <net/netfilter/nf_conntrack_helper.h>
23#include <net/netfilter/nf_conntrack_ecache.h>
23#include <net/netfilter/nf_conntrack_expect.h> 24#include <net/netfilter/nf_conntrack_expect.h>
24#include <net/netfilter/nf_nat.h> 25#include <net/netfilter/nf_nat.h>
25#include <net/netfilter/nf_nat_protocol.h> 26#include <net/netfilter/nf_nat_protocol.h>
@@ -43,8 +44,7 @@ adjust_tcp_sequence(u32 seq,
43 struct nf_nat_seq *this_way, *other_way; 44 struct nf_nat_seq *this_way, *other_way;
44 struct nf_conn_nat *nat = nfct_nat(ct); 45 struct nf_conn_nat *nat = nfct_nat(ct);
45 46
46 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", 47 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq);
47 ntohl(seq), seq);
48 48
49 dir = CTINFO2DIR(ctinfo); 49 dir = CTINFO2DIR(ctinfo);
50 50
@@ -180,8 +180,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
180 datalen, 0)); 180 datalen, 0));
181 } 181 }
182 } else 182 } else
183 nf_proto_csum_replace2(&tcph->check, skb, 183 inet_proto_csum_replace2(&tcph->check, skb,
184 htons(oldlen), htons(datalen), 1); 184 htons(oldlen), htons(datalen), 1);
185 185
186 if (rep_len != match_len) { 186 if (rep_len != match_len) {
187 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 187 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -191,6 +191,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
191 /* Tell TCP window tracking about seq change */ 191 /* Tell TCP window tracking about seq change */
192 nf_conntrack_tcp_update(skb, ip_hdrlen(skb), 192 nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
193 ct, CTINFO2DIR(ctinfo)); 193 ct, CTINFO2DIR(ctinfo));
194
195 nf_conntrack_event_cache(IPCT_NATSEQADJ, skb);
194 } 196 }
195 return 1; 197 return 1;
196} 198}
@@ -270,8 +272,8 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
270 udph->check = CSUM_MANGLED_0; 272 udph->check = CSUM_MANGLED_0;
271 } 273 }
272 } else 274 } else
273 nf_proto_csum_replace2(&udph->check, skb, 275 inet_proto_csum_replace2(&udph->check, skb,
274 htons(oldlen), htons(datalen), 1); 276 htons(oldlen), htons(datalen), 1);
275 277
276 return 1; 278 return 1;
277} 279}
@@ -310,10 +312,10 @@ sack_adjust(struct sk_buff *skb,
310 ntohl(sack->start_seq), new_start_seq, 312 ntohl(sack->start_seq), new_start_seq,
311 ntohl(sack->end_seq), new_end_seq); 313 ntohl(sack->end_seq), new_end_seq);
312 314
313 nf_proto_csum_replace4(&tcph->check, skb, 315 inet_proto_csum_replace4(&tcph->check, skb,
314 sack->start_seq, new_start_seq, 0); 316 sack->start_seq, new_start_seq, 0);
315 nf_proto_csum_replace4(&tcph->check, skb, 317 inet_proto_csum_replace4(&tcph->check, skb,
316 sack->end_seq, new_end_seq, 0); 318 sack->end_seq, new_end_seq, 0);
317 sack->start_seq = new_start_seq; 319 sack->start_seq = new_start_seq;
318 sack->end_seq = new_end_seq; 320 sack->end_seq = new_end_seq;
319 sackoff += sizeof(*sack); 321 sackoff += sizeof(*sack);
@@ -397,8 +399,8 @@ nf_nat_seq_adjust(struct sk_buff *skb,
397 else 399 else
398 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); 400 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
399 401
400 nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); 402 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
401 nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); 403 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
402 404
403 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", 405 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
404 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), 406 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
@@ -430,15 +432,13 @@ void nf_nat_follow_master(struct nf_conn *ct,
430 range.flags = IP_NAT_RANGE_MAP_IPS; 432 range.flags = IP_NAT_RANGE_MAP_IPS;
431 range.min_ip = range.max_ip 433 range.min_ip = range.max_ip
432 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 434 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
433 /* hook doesn't matter, but it has to do source manip */ 435 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
434 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
435 436
436 /* For DST manip, map port here to where it's expected. */ 437 /* For DST manip, map port here to where it's expected. */
437 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 438 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
438 range.min = range.max = exp->saved_proto; 439 range.min = range.max = exp->saved_proto;
439 range.min_ip = range.max_ip 440 range.min_ip = range.max_ip
440 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; 441 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
441 /* hook doesn't matter, but it has to do destination manip */ 442 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
442 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
443} 443}
444EXPORT_SYMBOL(nf_nat_follow_master); 444EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 766e2c16c6b9..fe6f9cef6c85 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -74,7 +74,7 @@ static void __exit nf_nat_irc_fini(void)
74 74
75static int __init nf_nat_irc_init(void) 75static int __init nf_nat_irc_init(void)
76{ 76{
77 BUG_ON(rcu_dereference(nf_nat_irc_hook)); 77 BUG_ON(nf_nat_irc_hook != NULL);
78 rcu_assign_pointer(nf_nat_irc_hook, help); 78 rcu_assign_pointer(nf_nat_irc_hook, help);
79 return 0; 79 return 0;
80} 80}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e1385a099079..3a1e6d6afc0a 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -40,11 +40,11 @@ MODULE_ALIAS("ip_nat_pptp");
40static void pptp_nat_expected(struct nf_conn *ct, 40static void pptp_nat_expected(struct nf_conn *ct,
41 struct nf_conntrack_expect *exp) 41 struct nf_conntrack_expect *exp)
42{ 42{
43 struct nf_conn *master = ct->master; 43 const struct nf_conn *master = ct->master;
44 struct nf_conntrack_expect *other_exp; 44 struct nf_conntrack_expect *other_exp;
45 struct nf_conntrack_tuple t; 45 struct nf_conntrack_tuple t;
46 struct nf_ct_pptp_master *ct_pptp_info; 46 const struct nf_ct_pptp_master *ct_pptp_info;
47 struct nf_nat_pptp *nat_pptp_info; 47 const struct nf_nat_pptp *nat_pptp_info;
48 struct nf_nat_range range; 48 struct nf_nat_range range;
49 49
50 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; 50 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
@@ -93,8 +93,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
93 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 93 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
94 range.min = range.max = exp->saved_proto; 94 range.min = range.max = exp->saved_proto;
95 } 95 }
96 /* hook doesn't matter, but it has to do source manip */ 96 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
97 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
98 97
99 /* For DST manip, map port here to where it's expected. */ 98 /* For DST manip, map port here to where it's expected. */
100 range.flags = IP_NAT_RANGE_MAP_IPS; 99 range.flags = IP_NAT_RANGE_MAP_IPS;
@@ -104,8 +103,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
104 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 103 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
105 range.min = range.max = exp->saved_proto; 104 range.min = range.max = exp->saved_proto;
106 } 105 }
107 /* hook doesn't matter, but it has to do destination manip */ 106 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
108 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
109} 107}
110 108
111/* outbound packets == from PNS to PAC */ 109/* outbound packets == from PNS to PAC */
@@ -188,7 +186,7 @@ static void
188pptp_exp_gre(struct nf_conntrack_expect *expect_orig, 186pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
189 struct nf_conntrack_expect *expect_reply) 187 struct nf_conntrack_expect *expect_reply)
190{ 188{
191 struct nf_conn *ct = expect_orig->master; 189 const struct nf_conn *ct = expect_orig->master;
192 struct nf_ct_pptp_master *ct_pptp_info; 190 struct nf_ct_pptp_master *ct_pptp_info;
193 struct nf_nat_pptp *nat_pptp_info; 191 struct nf_nat_pptp *nat_pptp_info;
194 192
@@ -219,7 +217,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
219 struct PptpControlHeader *ctlh, 217 struct PptpControlHeader *ctlh,
220 union pptp_ctrl_union *pptpReq) 218 union pptp_ctrl_union *pptpReq)
221{ 219{
222 struct nf_nat_pptp *nat_pptp_info; 220 const struct nf_nat_pptp *nat_pptp_info;
223 u_int16_t msg; 221 u_int16_t msg;
224 __be16 new_pcid; 222 __be16 new_pcid;
225 unsigned int pcid_off; 223 unsigned int pcid_off;
@@ -281,16 +279,16 @@ static int __init nf_nat_helper_pptp_init(void)
281{ 279{
282 nf_nat_need_gre(); 280 nf_nat_need_gre();
283 281
284 BUG_ON(rcu_dereference(nf_nat_pptp_hook_outbound)); 282 BUG_ON(nf_nat_pptp_hook_outbound != NULL);
285 rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); 283 rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
286 284
287 BUG_ON(rcu_dereference(nf_nat_pptp_hook_inbound)); 285 BUG_ON(nf_nat_pptp_hook_inbound != NULL);
288 rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); 286 rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
289 287
290 BUG_ON(rcu_dereference(nf_nat_pptp_hook_exp_gre)); 288 BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
291 rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); 289 rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
292 290
293 BUG_ON(rcu_dereference(nf_nat_pptp_hook_expectfn)); 291 BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
294 rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); 292 rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
295 return 0; 293 return 0;
296} 294}
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index b820f9960356..a1e4da16da2e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -59,7 +59,7 @@ static int
59gre_unique_tuple(struct nf_conntrack_tuple *tuple, 59gre_unique_tuple(struct nf_conntrack_tuple *tuple,
60 const struct nf_nat_range *range, 60 const struct nf_nat_range *range,
61 enum nf_nat_manip_type maniptype, 61 enum nf_nat_manip_type maniptype,
62 const struct nf_conn *conntrack) 62 const struct nf_conn *ct)
63{ 63{
64 static u_int16_t key; 64 static u_int16_t key;
65 __be16 *keyptr; 65 __be16 *keyptr;
@@ -67,7 +67,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
67 67
68 /* If there is no master conntrack we are not PPTP, 68 /* If there is no master conntrack we are not PPTP,
69 do not change tuples */ 69 do not change tuples */
70 if (!conntrack->master) 70 if (!ct->master)
71 return 0; 71 return 0;
72 72
73 if (maniptype == IP_NAT_MANIP_SRC) 73 if (maniptype == IP_NAT_MANIP_SRC)
@@ -76,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
76 keyptr = &tuple->dst.u.gre.key; 76 keyptr = &tuple->dst.u.gre.key;
77 77
78 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 78 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
79 pr_debug("%p: NATing GRE PPTP\n", conntrack); 79 pr_debug("%p: NATing GRE PPTP\n", ct);
80 min = 1; 80 min = 1;
81 range_size = 0xffff; 81 range_size = 0xffff;
82 } else { 82 } else {
@@ -88,11 +88,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
88 88
89 for (i = 0; i < range_size; i++, key++) { 89 for (i = 0; i < range_size; i++, key++) {
90 *keyptr = htons(min + key % range_size); 90 *keyptr = htons(min + key % range_size);
91 if (!nf_nat_used_tuple(tuple, conntrack)) 91 if (!nf_nat_used_tuple(tuple, ct))
92 return 1; 92 return 1;
93 } 93 }
94 94
95 pr_debug("%p: no NAT mapping\n", conntrack); 95 pr_debug("%p: no NAT mapping\n", ct);
96 return 0; 96 return 0;
97} 97}
98 98
@@ -104,7 +104,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
104{ 104{
105 struct gre_hdr *greh; 105 struct gre_hdr *greh;
106 struct gre_hdr_pptp *pgreh; 106 struct gre_hdr_pptp *pgreh;
107 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 107 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
108 unsigned int hdroff = iphdroff + iph->ihl * 4; 108 unsigned int hdroff = iphdroff + iph->ihl * 4;
109 109
110 /* pgreh includes two optional 32bit fields which are not required 110 /* pgreh includes two optional 32bit fields which are not required
@@ -135,9 +135,10 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
135 return 1; 135 return 1;
136} 136}
137 137
138static struct nf_nat_protocol gre __read_mostly = { 138static const struct nf_nat_protocol gre = {
139 .name = "GRE", 139 .name = "GRE",
140 .protonum = IPPROTO_GRE, 140 .protonum = IPPROTO_GRE,
141 .me = THIS_MODULE,
141 .manip_pkt = gre_manip_pkt, 142 .manip_pkt = gre_manip_pkt,
142 .in_range = gre_in_range, 143 .in_range = gre_in_range,
143 .unique_tuple = gre_unique_tuple, 144 .unique_tuple = gre_unique_tuple,
@@ -147,12 +148,12 @@ static struct nf_nat_protocol gre __read_mostly = {
147#endif 148#endif
148}; 149};
149 150
150int __init nf_nat_proto_gre_init(void) 151static int __init nf_nat_proto_gre_init(void)
151{ 152{
152 return nf_nat_protocol_register(&gre); 153 return nf_nat_protocol_register(&gre);
153} 154}
154 155
155void __exit nf_nat_proto_gre_fini(void) 156static void __exit nf_nat_proto_gre_fini(void)
156{ 157{
157 nf_nat_protocol_unregister(&gre); 158 nf_nat_protocol_unregister(&gre);
158} 159}
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index b9fc724388fc..03a02969aa57 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -57,7 +57,7 @@ icmp_manip_pkt(struct sk_buff *skb,
57 const struct nf_conntrack_tuple *tuple, 57 const struct nf_conntrack_tuple *tuple,
58 enum nf_nat_manip_type maniptype) 58 enum nf_nat_manip_type maniptype)
59{ 59{
60 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 60 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
61 struct icmphdr *hdr; 61 struct icmphdr *hdr;
62 unsigned int hdroff = iphdroff + iph->ihl*4; 62 unsigned int hdroff = iphdroff + iph->ihl*4;
63 63
@@ -65,13 +65,13 @@ icmp_manip_pkt(struct sk_buff *skb,
65 return 0; 65 return 0;
66 66
67 hdr = (struct icmphdr *)(skb->data + hdroff); 67 hdr = (struct icmphdr *)(skb->data + hdroff);
68 nf_proto_csum_replace2(&hdr->checksum, skb, 68 inet_proto_csum_replace2(&hdr->checksum, skb,
69 hdr->un.echo.id, tuple->src.u.icmp.id, 0); 69 hdr->un.echo.id, tuple->src.u.icmp.id, 0);
70 hdr->un.echo.id = tuple->src.u.icmp.id; 70 hdr->un.echo.id = tuple->src.u.icmp.id;
71 return 1; 71 return 1;
72} 72}
73 73
74struct nf_nat_protocol nf_nat_protocol_icmp = { 74const struct nf_nat_protocol nf_nat_protocol_icmp = {
75 .name = "ICMP", 75 .name = "ICMP",
76 .protonum = IPPROTO_ICMP, 76 .protonum = IPPROTO_ICMP,
77 .me = THIS_MODULE, 77 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 6bab2e184455..ffd5d1589eca 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -93,7 +93,7 @@ tcp_manip_pkt(struct sk_buff *skb,
93 const struct nf_conntrack_tuple *tuple, 93 const struct nf_conntrack_tuple *tuple,
94 enum nf_nat_manip_type maniptype) 94 enum nf_nat_manip_type maniptype)
95{ 95{
96 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 96 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
97 struct tcphdr *hdr; 97 struct tcphdr *hdr;
98 unsigned int hdroff = iphdroff + iph->ihl*4; 98 unsigned int hdroff = iphdroff + iph->ihl*4;
99 __be32 oldip, newip; 99 __be32 oldip, newip;
@@ -132,12 +132,12 @@ tcp_manip_pkt(struct sk_buff *skb,
132 if (hdrsize < sizeof(*hdr)) 132 if (hdrsize < sizeof(*hdr))
133 return 1; 133 return 1;
134 134
135 nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); 135 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
136 nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); 136 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
137 return 1; 137 return 1;
138} 138}
139 139
140struct nf_nat_protocol nf_nat_protocol_tcp = { 140const struct nf_nat_protocol nf_nat_protocol_tcp = {
141 .name = "TCP", 141 .name = "TCP",
142 .protonum = IPPROTO_TCP, 142 .protonum = IPPROTO_TCP,
143 .me = THIS_MODULE, 143 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index cbf1a61e2908..4b8f49910ff2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -91,7 +91,7 @@ udp_manip_pkt(struct sk_buff *skb,
91 const struct nf_conntrack_tuple *tuple, 91 const struct nf_conntrack_tuple *tuple,
92 enum nf_nat_manip_type maniptype) 92 enum nf_nat_manip_type maniptype)
93{ 93{
94 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 94 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
95 struct udphdr *hdr; 95 struct udphdr *hdr;
96 unsigned int hdroff = iphdroff + iph->ihl*4; 96 unsigned int hdroff = iphdroff + iph->ihl*4;
97 __be32 oldip, newip; 97 __be32 oldip, newip;
@@ -117,9 +117,9 @@ udp_manip_pkt(struct sk_buff *skb,
117 portptr = &hdr->dest; 117 portptr = &hdr->dest;
118 } 118 }
119 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { 119 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
120 nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); 120 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
121 nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 121 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
122 0); 122 0);
123 if (!hdr->check) 123 if (!hdr->check)
124 hdr->check = CSUM_MANGLED_0; 124 hdr->check = CSUM_MANGLED_0;
125 } 125 }
@@ -127,7 +127,7 @@ udp_manip_pkt(struct sk_buff *skb,
127 return 1; 127 return 1;
128} 128}
129 129
130struct nf_nat_protocol nf_nat_protocol_udp = { 130const struct nf_nat_protocol nf_nat_protocol_udp = {
131 .name = "UDP", 131 .name = "UDP",
132 .protonum = IPPROTO_UDP, 132 .protonum = IPPROTO_UDP,
133 .me = THIS_MODULE, 133 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index cfd2742e9706..a26efeb073cb 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -45,7 +45,7 @@ unknown_manip_pkt(struct sk_buff *skb,
45 return 1; 45 return 1;
46} 46}
47 47
48struct nf_nat_protocol nf_nat_unknown_protocol = { 48const struct nf_nat_protocol nf_nat_unknown_protocol = {
49 .name = "unknown", 49 .name = "unknown",
50 /* .me isn't set: getting a ref to this cannot fail. */ 50 /* .me isn't set: getting a ref to this cannot fail. */
51 .manip_pkt = unknown_manip_pkt, 51 .manip_pkt = unknown_manip_pkt,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 46b25ab5f78b..f8fda57ba20b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -24,7 +24,9 @@
24#include <net/netfilter/nf_nat_core.h> 24#include <net/netfilter/nf_nat_core.h>
25#include <net/netfilter/nf_nat_rule.h> 25#include <net/netfilter/nf_nat_rule.h>
26 26
27#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT)) 27#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
28 (1 << NF_INET_POST_ROUTING) | \
29 (1 << NF_INET_LOCAL_OUT))
28 30
29static struct 31static struct
30{ 32{
@@ -38,14 +40,14 @@ static struct
38 .num_entries = 4, 40 .num_entries = 4,
39 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), 41 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
40 .hook_entry = { 42 .hook_entry = {
41 [NF_IP_PRE_ROUTING] = 0, 43 [NF_INET_PRE_ROUTING] = 0,
42 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), 44 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
43 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 45 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
44 }, 46 },
45 .underflow = { 47 .underflow = {
46 [NF_IP_PRE_ROUTING] = 0, 48 [NF_INET_PRE_ROUTING] = 0,
47 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), 49 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
48 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 50 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
49 }, 51 },
50 }, 52 },
51 .entries = { 53 .entries = {
@@ -56,13 +58,14 @@ static struct
56 .term = IPT_ERROR_INIT, /* ERROR */ 58 .term = IPT_ERROR_INIT, /* ERROR */
57}; 59};
58 60
59static struct xt_table nat_table = { 61static struct xt_table __nat_table = {
60 .name = "nat", 62 .name = "nat",
61 .valid_hooks = NAT_VALID_HOOKS, 63 .valid_hooks = NAT_VALID_HOOKS,
62 .lock = RW_LOCK_UNLOCKED, 64 .lock = RW_LOCK_UNLOCKED,
63 .me = THIS_MODULE, 65 .me = THIS_MODULE,
64 .af = AF_INET, 66 .af = AF_INET,
65}; 67};
68static struct xt_table *nat_table;
66 69
67/* Source NAT */ 70/* Source NAT */
68static unsigned int ipt_snat_target(struct sk_buff *skb, 71static unsigned int ipt_snat_target(struct sk_buff *skb,
@@ -76,7 +79,7 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
76 enum ip_conntrack_info ctinfo; 79 enum ip_conntrack_info ctinfo;
77 const struct nf_nat_multi_range_compat *mr = targinfo; 80 const struct nf_nat_multi_range_compat *mr = targinfo;
78 81
79 NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); 82 NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
80 83
81 ct = nf_ct_get(skb, &ctinfo); 84 ct = nf_ct_get(skb, &ctinfo);
82 85
@@ -85,7 +88,7 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
85 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); 88 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
86 NF_CT_ASSERT(out); 89 NF_CT_ASSERT(out);
87 90
88 return nf_nat_setup_info(ct, &mr->range[0], hooknum); 91 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
89} 92}
90 93
91/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ 94/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
@@ -95,7 +98,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
95 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; 98 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
96 struct rtable *rt; 99 struct rtable *rt;
97 100
98 if (ip_route_output_key(&rt, &fl) != 0) 101 if (ip_route_output_key(&init_net, &rt, &fl) != 0)
99 return; 102 return;
100 103
101 if (rt->rt_src != srcip && !warned) { 104 if (rt->rt_src != srcip && !warned) {
@@ -118,20 +121,20 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb,
118 enum ip_conntrack_info ctinfo; 121 enum ip_conntrack_info ctinfo;
119 const struct nf_nat_multi_range_compat *mr = targinfo; 122 const struct nf_nat_multi_range_compat *mr = targinfo;
120 123
121 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || 124 NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
122 hooknum == NF_IP_LOCAL_OUT); 125 hooknum == NF_INET_LOCAL_OUT);
123 126
124 ct = nf_ct_get(skb, &ctinfo); 127 ct = nf_ct_get(skb, &ctinfo);
125 128
126 /* Connection must be valid and new. */ 129 /* Connection must be valid and new. */
127 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); 130 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
128 131
129 if (hooknum == NF_IP_LOCAL_OUT && 132 if (hooknum == NF_INET_LOCAL_OUT &&
130 mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) 133 mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
131 warn_if_extra_mangle(ip_hdr(skb)->daddr, 134 warn_if_extra_mangle(ip_hdr(skb)->daddr,
132 mr->range[0].min_ip); 135 mr->range[0].min_ip);
133 136
134 return nf_nat_setup_info(ct, &mr->range[0], hooknum); 137 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
135} 138}
136 139
137static bool ipt_snat_checkentry(const char *tablename, 140static bool ipt_snat_checkentry(const char *tablename,
@@ -182,7 +185,7 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
182 185
183 pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n", 186 pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
184 ct, NIPQUAD(ip)); 187 ct, NIPQUAD(ip));
185 return nf_nat_setup_info(ct, &range, hooknum); 188 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
186} 189}
187 190
188unsigned int 191unsigned int
@@ -201,7 +204,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
201 204
202 pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", 205 pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
203 ct, NIPQUAD(ip)); 206 ct, NIPQUAD(ip));
204 return nf_nat_setup_info(ct, &range, hooknum); 207 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
205} 208}
206 209
207int nf_nat_rule_find(struct sk_buff *skb, 210int nf_nat_rule_find(struct sk_buff *skb,
@@ -212,7 +215,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
212{ 215{
213 int ret; 216 int ret;
214 217
215 ret = ipt_do_table(skb, hooknum, in, out, &nat_table); 218 ret = ipt_do_table(skb, hooknum, in, out, nat_table);
216 219
217 if (ret == NF_ACCEPT) { 220 if (ret == NF_ACCEPT) {
218 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) 221 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -227,7 +230,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
227 .target = ipt_snat_target, 230 .target = ipt_snat_target,
228 .targetsize = sizeof(struct nf_nat_multi_range_compat), 231 .targetsize = sizeof(struct nf_nat_multi_range_compat),
229 .table = "nat", 232 .table = "nat",
230 .hooks = 1 << NF_IP_POST_ROUTING, 233 .hooks = 1 << NF_INET_POST_ROUTING,
231 .checkentry = ipt_snat_checkentry, 234 .checkentry = ipt_snat_checkentry,
232 .family = AF_INET, 235 .family = AF_INET,
233}; 236};
@@ -237,7 +240,7 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
237 .target = ipt_dnat_target, 240 .target = ipt_dnat_target,
238 .targetsize = sizeof(struct nf_nat_multi_range_compat), 241 .targetsize = sizeof(struct nf_nat_multi_range_compat),
239 .table = "nat", 242 .table = "nat",
240 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), 243 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
241 .checkentry = ipt_dnat_checkentry, 244 .checkentry = ipt_dnat_checkentry,
242 .family = AF_INET, 245 .family = AF_INET,
243}; 246};
@@ -246,9 +249,10 @@ int __init nf_nat_rule_init(void)
246{ 249{
247 int ret; 250 int ret;
248 251
249 ret = ipt_register_table(&nat_table, &nat_initial_table.repl); 252 nat_table = ipt_register_table(&init_net, &__nat_table,
250 if (ret != 0) 253 &nat_initial_table.repl);
251 return ret; 254 if (IS_ERR(nat_table))
255 return PTR_ERR(nat_table);
252 ret = xt_register_target(&ipt_snat_reg); 256 ret = xt_register_target(&ipt_snat_reg);
253 if (ret != 0) 257 if (ret != 0)
254 goto unregister_table; 258 goto unregister_table;
@@ -262,7 +266,7 @@ int __init nf_nat_rule_init(void)
262 unregister_snat: 266 unregister_snat:
263 xt_unregister_target(&ipt_snat_reg); 267 xt_unregister_target(&ipt_snat_reg);
264 unregister_table: 268 unregister_table:
265 ipt_unregister_table(&nat_table); 269 ipt_unregister_table(nat_table);
266 270
267 return ret; 271 return ret;
268} 272}
@@ -271,5 +275,5 @@ void nf_nat_rule_cleanup(void)
271{ 275{
272 xt_unregister_target(&ipt_dnat_reg); 276 xt_unregister_target(&ipt_dnat_reg);
273 xt_unregister_target(&ipt_snat_reg); 277 xt_unregister_target(&ipt_snat_reg);
274 ipt_unregister_table(&nat_table); 278 ipt_unregister_table(nat_table);
275} 279}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ce9edbcc01e3..b4c8d4968bb2 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -35,9 +35,9 @@ struct addr_map {
35 } addr[IP_CT_DIR_MAX]; 35 } addr[IP_CT_DIR_MAX];
36}; 36};
37 37
38static void addr_map_init(struct nf_conn *ct, struct addr_map *map) 38static void addr_map_init(const struct nf_conn *ct, struct addr_map *map)
39{ 39{
40 struct nf_conntrack_tuple *t; 40 const struct nf_conntrack_tuple *t;
41 enum ip_conntrack_dir dir; 41 enum ip_conntrack_dir dir;
42 unsigned int n; 42 unsigned int n;
43 43
@@ -165,7 +165,7 @@ static int mangle_content_len(struct sk_buff *skb,
165 165
166 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); 166 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
167 167
168 /* Get actual SDP lenght */ 168 /* Get actual SDP length */
169 if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, 169 if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
170 &matchlen, POS_SDP_HEADER) > 0) { 170 &matchlen, POS_SDP_HEADER) > 0) {
171 171
@@ -228,15 +228,13 @@ static void ip_nat_sdp_expect(struct nf_conn *ct,
228 range.flags = IP_NAT_RANGE_MAP_IPS; 228 range.flags = IP_NAT_RANGE_MAP_IPS;
229 range.min_ip = range.max_ip 229 range.min_ip = range.max_ip
230 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 230 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
231 /* hook doesn't matter, but it has to do source manip */ 231 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
232 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
233 232
234 /* For DST manip, map port here to where it's expected. */ 233 /* For DST manip, map port here to where it's expected. */
235 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 234 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
236 range.min = range.max = exp->saved_proto; 235 range.min = range.max = exp->saved_proto;
237 range.min_ip = range.max_ip = exp->saved_ip; 236 range.min_ip = range.max_ip = exp->saved_ip;
238 /* hook doesn't matter, but it has to do destination manip */ 237 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
239 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
240} 238}
241 239
242/* So, this packet has hit the connection tracking matching code. 240/* So, this packet has hit the connection tracking matching code.
@@ -293,8 +291,8 @@ static void __exit nf_nat_sip_fini(void)
293 291
294static int __init nf_nat_sip_init(void) 292static int __init nf_nat_sip_init(void)
295{ 293{
296 BUG_ON(rcu_dereference(nf_nat_sip_hook)); 294 BUG_ON(nf_nat_sip_hook != NULL);
297 BUG_ON(rcu_dereference(nf_nat_sdp_hook)); 295 BUG_ON(nf_nat_sdp_hook != NULL);
298 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); 296 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
299 rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); 297 rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp);
300 return 0; 298 return 0;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 03709d6b4b06..540ce6ae887c 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -60,7 +60,7 @@ MODULE_ALIAS("ip_nat_snmp_basic");
60 60
61#define SNMP_PORT 161 61#define SNMP_PORT 161
62#define SNMP_TRAP_PORT 162 62#define SNMP_TRAP_PORT 162
63#define NOCT1(n) (*(u8 *)n) 63#define NOCT1(n) (*(u8 *)(n))
64 64
65static int debug; 65static int debug;
66static DEFINE_SPINLOCK(snmp_lock); 66static DEFINE_SPINLOCK(snmp_lock);
@@ -260,7 +260,7 @@ static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
260{ 260{
261 unsigned char ch; 261 unsigned char ch;
262 262
263 if (eoc == 0) { 263 if (eoc == NULL) {
264 if (!asn1_octet_decode(ctx, &ch)) 264 if (!asn1_octet_decode(ctx, &ch))
265 return 0; 265 return 0;
266 266
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 7db76ea9af91..99b2c788d5a8 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
137 if (unlikely(nf_ct_is_confirmed(ct))) 137 if (unlikely(nf_ct_is_confirmed(ct)))
138 /* NAT module was loaded late */ 138 /* NAT module was loaded late */
139 ret = alloc_null_binding_confirmed(ct, hooknum); 139 ret = alloc_null_binding_confirmed(ct, hooknum);
140 else if (hooknum == NF_IP_LOCAL_IN) 140 else if (hooknum == NF_INET_LOCAL_IN)
141 /* LOCAL_IN hook doesn't have a chain! */ 141 /* LOCAL_IN hook doesn't have a chain! */
142 ret = alloc_null_binding(ct, hooknum); 142 ret = alloc_null_binding(ct, hooknum);
143 else 143 else
@@ -273,13 +273,13 @@ nf_nat_adjust(unsigned int hooknum,
273 273
274/* We must be after connection tracking and before packet filtering. */ 274/* We must be after connection tracking and before packet filtering. */
275 275
276static struct nf_hook_ops nf_nat_ops[] = { 276static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
277 /* Before packet filtering, change destination */ 277 /* Before packet filtering, change destination */
278 { 278 {
279 .hook = nf_nat_in, 279 .hook = nf_nat_in,
280 .owner = THIS_MODULE, 280 .owner = THIS_MODULE,
281 .pf = PF_INET, 281 .pf = PF_INET,
282 .hooknum = NF_IP_PRE_ROUTING, 282 .hooknum = NF_INET_PRE_ROUTING,
283 .priority = NF_IP_PRI_NAT_DST, 283 .priority = NF_IP_PRI_NAT_DST,
284 }, 284 },
285 /* After packet filtering, change source */ 285 /* After packet filtering, change source */
@@ -287,7 +287,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
287 .hook = nf_nat_out, 287 .hook = nf_nat_out,
288 .owner = THIS_MODULE, 288 .owner = THIS_MODULE,
289 .pf = PF_INET, 289 .pf = PF_INET,
290 .hooknum = NF_IP_POST_ROUTING, 290 .hooknum = NF_INET_POST_ROUTING,
291 .priority = NF_IP_PRI_NAT_SRC, 291 .priority = NF_IP_PRI_NAT_SRC,
292 }, 292 },
293 /* After conntrack, adjust sequence number */ 293 /* After conntrack, adjust sequence number */
@@ -295,7 +295,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
295 .hook = nf_nat_adjust, 295 .hook = nf_nat_adjust,
296 .owner = THIS_MODULE, 296 .owner = THIS_MODULE,
297 .pf = PF_INET, 297 .pf = PF_INET,
298 .hooknum = NF_IP_POST_ROUTING, 298 .hooknum = NF_INET_POST_ROUTING,
299 .priority = NF_IP_PRI_NAT_SEQ_ADJUST, 299 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
300 }, 300 },
301 /* Before packet filtering, change destination */ 301 /* Before packet filtering, change destination */
@@ -303,7 +303,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
303 .hook = nf_nat_local_fn, 303 .hook = nf_nat_local_fn,
304 .owner = THIS_MODULE, 304 .owner = THIS_MODULE,
305 .pf = PF_INET, 305 .pf = PF_INET,
306 .hooknum = NF_IP_LOCAL_OUT, 306 .hooknum = NF_INET_LOCAL_OUT,
307 .priority = NF_IP_PRI_NAT_DST, 307 .priority = NF_IP_PRI_NAT_DST,
308 }, 308 },
309 /* After packet filtering, change source */ 309 /* After packet filtering, change source */
@@ -311,7 +311,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
311 .hook = nf_nat_fn, 311 .hook = nf_nat_fn,
312 .owner = THIS_MODULE, 312 .owner = THIS_MODULE,
313 .pf = PF_INET, 313 .pf = PF_INET,
314 .hooknum = NF_IP_LOCAL_IN, 314 .hooknum = NF_INET_LOCAL_IN,
315 .priority = NF_IP_PRI_NAT_SRC, 315 .priority = NF_IP_PRI_NAT_SRC,
316 }, 316 },
317 /* After conntrack, adjust sequence number */ 317 /* After conntrack, adjust sequence number */
@@ -319,7 +319,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
319 .hook = nf_nat_adjust, 319 .hook = nf_nat_adjust,
320 .owner = THIS_MODULE, 320 .owner = THIS_MODULE,
321 .pf = PF_INET, 321 .pf = PF_INET,
322 .hooknum = NF_IP_LOCAL_IN, 322 .hooknum = NF_INET_LOCAL_IN,
323 .priority = NF_IP_PRI_NAT_SEQ_ADJUST, 323 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
324 }, 324 },
325}; 325};
@@ -332,7 +332,7 @@ static int __init nf_nat_standalone_init(void)
332 332
333#ifdef CONFIG_XFRM 333#ifdef CONFIG_XFRM
334 BUG_ON(ip_nat_decode_session != NULL); 334 BUG_ON(ip_nat_decode_session != NULL);
335 ip_nat_decode_session = nat_decode_session; 335 rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
336#endif 336#endif
337 ret = nf_nat_rule_init(); 337 ret = nf_nat_rule_init();
338 if (ret < 0) { 338 if (ret < 0) {
@@ -350,7 +350,7 @@ static int __init nf_nat_standalone_init(void)
350 nf_nat_rule_cleanup(); 350 nf_nat_rule_cleanup();
351 cleanup_decode_session: 351 cleanup_decode_session:
352#ifdef CONFIG_XFRM 352#ifdef CONFIG_XFRM
353 ip_nat_decode_session = NULL; 353 rcu_assign_pointer(ip_nat_decode_session, NULL);
354 synchronize_net(); 354 synchronize_net();
355#endif 355#endif
356 return ret; 356 return ret;
@@ -361,7 +361,7 @@ static void __exit nf_nat_standalone_fini(void)
361 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); 361 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
362 nf_nat_rule_cleanup(); 362 nf_nat_rule_cleanup();
363#ifdef CONFIG_XFRM 363#ifdef CONFIG_XFRM
364 ip_nat_decode_session = NULL; 364 rcu_assign_pointer(ip_nat_decode_session, NULL);
365 synchronize_net(); 365 synchronize_net();
366#endif 366#endif
367 /* Conntrack caches are unregistered in nf_conntrack_cleanup */ 367 /* Conntrack caches are unregistered in nf_conntrack_cleanup */
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 0ecec701cb44..b096e81500ae 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -24,7 +24,7 @@ static unsigned int help(struct sk_buff *skb,
24 enum ip_conntrack_info ctinfo, 24 enum ip_conntrack_info ctinfo,
25 struct nf_conntrack_expect *exp) 25 struct nf_conntrack_expect *exp)
26{ 26{
27 struct nf_conn *ct = exp->master; 27 const struct nf_conn *ct = exp->master;
28 28
29 exp->saved_proto.udp.port 29 exp->saved_proto.udp.port
30 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; 30 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
@@ -43,7 +43,7 @@ static void __exit nf_nat_tftp_fini(void)
43 43
44static int __init nf_nat_tftp_init(void) 44static int __init nf_nat_tftp_init(void)
45{ 45{
46 BUG_ON(rcu_dereference(nf_nat_tftp_hook)); 46 BUG_ON(nf_nat_tftp_hook != NULL);
47 rcu_assign_pointer(nf_nat_tftp_hook, help); 47 rcu_assign_pointer(nf_nat_tftp_hook, help);
48 return 0; 48 return 0;
49} 49}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9be0daa9c0ec..d63474c6b400 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,17 +46,6 @@
46#include <net/sock.h> 46#include <net/sock.h>
47#include <net/raw.h> 47#include <net/raw.h>
48 48
49static int fold_prot_inuse(struct proto *proto)
50{
51 int res = 0;
52 int cpu;
53
54 for_each_possible_cpu(cpu)
55 res += proto->stats[cpu].inuse;
56
57 return res;
58}
59
60/* 49/*
61 * Report socket allocation statistics [mea@utu.fi] 50 * Report socket allocation statistics [mea@utu.fi]
62 */ 51 */
@@ -64,14 +53,16 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
64{ 53{
65 socket_seq_show(seq); 54 socket_seq_show(seq);
66 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
67 fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), 56 sock_prot_inuse_get(&tcp_prot),
57 atomic_read(&tcp_orphan_count),
68 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), 58 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
69 atomic_read(&tcp_memory_allocated)); 59 atomic_read(&tcp_memory_allocated));
70 seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); 60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot),
71 seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); 61 atomic_read(&udp_memory_allocated));
72 seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); 62 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
63 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
73 seq_printf(seq, "FRAG: inuse %d memory %d\n", 64 seq_printf(seq, "FRAG: inuse %d memory %d\n",
74 ip_frag_nqueues(), ip_frag_mem()); 65 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
75 return 0; 66 return 0;
76} 67}
77 68
@@ -304,7 +295,7 @@ static void icmp_put(struct seq_file *seq)
304 for (i=0; icmpmibmap[i].name != NULL; i++) 295 for (i=0; icmpmibmap[i].name != NULL; i++)
305 seq_printf(seq, " %lu", 296 seq_printf(seq, " %lu",
306 snmp_fold_field((void **) icmpmsg_statistics, 297 snmp_fold_field((void **) icmpmsg_statistics,
307 icmpmibmap[i].index)); 298 icmpmibmap[i].index | 0x100));
308} 299}
309 300
310/* 301/*
@@ -320,7 +311,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
320 seq_printf(seq, " %s", snmp4_ipstats_list[i].name); 311 seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
321 312
322 seq_printf(seq, "\nIp: %d %d", 313 seq_printf(seq, "\nIp: %d %d",
323 IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); 314 IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
315 sysctl_ip_default_ttl);
324 316
325 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 317 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
326 seq_printf(seq, " %lu", 318 seq_printf(seq, " %lu",
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 3916faca3afe..a3002fe65b7f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -80,38 +80,51 @@
80#include <linux/netfilter.h> 80#include <linux/netfilter.h>
81#include <linux/netfilter_ipv4.h> 81#include <linux/netfilter_ipv4.h>
82 82
83struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; 83static struct raw_hashinfo raw_v4_hashinfo = {
84DEFINE_RWLOCK(raw_v4_lock); 84 .lock = __RW_LOCK_UNLOCKED(),
85};
85 86
86static void raw_v4_hash(struct sock *sk) 87void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h)
87{ 88{
88 struct hlist_head *head = &raw_v4_htable[inet_sk(sk)->num & 89 struct hlist_head *head;
89 (RAWV4_HTABLE_SIZE - 1)]; 90
91 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
90 92
91 write_lock_bh(&raw_v4_lock); 93 write_lock_bh(&h->lock);
92 sk_add_node(sk, head); 94 sk_add_node(sk, head);
93 sock_prot_inc_use(sk->sk_prot); 95 sock_prot_inuse_add(sk->sk_prot, 1);
94 write_unlock_bh(&raw_v4_lock); 96 write_unlock_bh(&h->lock);
95} 97}
98EXPORT_SYMBOL_GPL(raw_hash_sk);
96 99
97static void raw_v4_unhash(struct sock *sk) 100void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h)
98{ 101{
99 write_lock_bh(&raw_v4_lock); 102 write_lock_bh(&h->lock);
100 if (sk_del_node_init(sk)) 103 if (sk_del_node_init(sk))
101 sock_prot_dec_use(sk->sk_prot); 104 sock_prot_inuse_add(sk->sk_prot, -1);
102 write_unlock_bh(&raw_v4_lock); 105 write_unlock_bh(&h->lock);
103} 106}
107EXPORT_SYMBOL_GPL(raw_unhash_sk);
104 108
105struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, 109static void raw_v4_hash(struct sock *sk)
106 __be32 raddr, __be32 laddr, 110{
107 int dif) 111 raw_hash_sk(sk, &raw_v4_hashinfo);
112}
113
114static void raw_v4_unhash(struct sock *sk)
115{
116 raw_unhash_sk(sk, &raw_v4_hashinfo);
117}
118
119static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
120 unsigned short num, __be32 raddr, __be32 laddr, int dif)
108{ 121{
109 struct hlist_node *node; 122 struct hlist_node *node;
110 123
111 sk_for_each_from(sk, node) { 124 sk_for_each_from(sk, node) {
112 struct inet_sock *inet = inet_sk(sk); 125 struct inet_sock *inet = inet_sk(sk);
113 126
114 if (inet->num == num && 127 if (sk->sk_net == net && inet->num == num &&
115 !(inet->daddr && inet->daddr != raddr) && 128 !(inet->daddr && inet->daddr != raddr) &&
116 !(inet->rcv_saddr && inet->rcv_saddr != laddr) && 129 !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
117 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 130 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
@@ -150,17 +163,20 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
150 * RFC 1122: SHOULD pass TOS value up to the transport layer. 163 * RFC 1122: SHOULD pass TOS value up to the transport layer.
151 * -> It does. And not only TOS, but all IP header. 164 * -> It does. And not only TOS, but all IP header.
152 */ 165 */
153int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) 166static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
154{ 167{
155 struct sock *sk; 168 struct sock *sk;
156 struct hlist_head *head; 169 struct hlist_head *head;
157 int delivered = 0; 170 int delivered = 0;
171 struct net *net;
158 172
159 read_lock(&raw_v4_lock); 173 read_lock(&raw_v4_hashinfo.lock);
160 head = &raw_v4_htable[hash]; 174 head = &raw_v4_hashinfo.ht[hash];
161 if (hlist_empty(head)) 175 if (hlist_empty(head))
162 goto out; 176 goto out;
163 sk = __raw_v4_lookup(__sk_head(head), iph->protocol, 177
178 net = skb->dev->nd_net;
179 sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
164 iph->saddr, iph->daddr, 180 iph->saddr, iph->daddr,
165 skb->dev->ifindex); 181 skb->dev->ifindex);
166 182
@@ -173,16 +189,34 @@ int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
173 if (clone) 189 if (clone)
174 raw_rcv(sk, clone); 190 raw_rcv(sk, clone);
175 } 191 }
176 sk = __raw_v4_lookup(sk_next(sk), iph->protocol, 192 sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
177 iph->saddr, iph->daddr, 193 iph->saddr, iph->daddr,
178 skb->dev->ifindex); 194 skb->dev->ifindex);
179 } 195 }
180out: 196out:
181 read_unlock(&raw_v4_lock); 197 read_unlock(&raw_v4_hashinfo.lock);
182 return delivered; 198 return delivered;
183} 199}
184 200
185void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) 201int raw_local_deliver(struct sk_buff *skb, int protocol)
202{
203 int hash;
204 struct sock *raw_sk;
205
206 hash = protocol & (RAW_HTABLE_SIZE - 1);
207 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
208
209 /* If there maybe a raw socket we must check - if not we
210 * don't care less
211 */
212 if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
213 raw_sk = NULL;
214
215 return raw_sk != NULL;
216
217}
218
219static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
186{ 220{
187 struct inet_sock *inet = inet_sk(sk); 221 struct inet_sock *inet = inet_sk(sk);
188 const int type = icmp_hdr(skb)->type; 222 const int type = icmp_hdr(skb)->type;
@@ -236,12 +270,38 @@ void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
236 } 270 }
237} 271}
238 272
273void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
274{
275 int hash;
276 struct sock *raw_sk;
277 struct iphdr *iph;
278 struct net *net;
279
280 hash = protocol & (RAW_HTABLE_SIZE - 1);
281
282 read_lock(&raw_v4_hashinfo.lock);
283 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
284 if (raw_sk != NULL) {
285 iph = (struct iphdr *)skb->data;
286 net = skb->dev->nd_net;
287
288 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
289 iph->daddr, iph->saddr,
290 skb->dev->ifindex)) != NULL) {
291 raw_err(raw_sk, skb, info);
292 raw_sk = sk_next(raw_sk);
293 iph = (struct iphdr *)skb->data;
294 }
295 }
296 read_unlock(&raw_v4_hashinfo.lock);
297}
298
239static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) 299static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
240{ 300{
241 /* Charge it to the socket. */ 301 /* Charge it to the socket. */
242 302
243 if (sock_queue_rcv_skb(sk, skb) < 0) { 303 if (sock_queue_rcv_skb(sk, skb) < 0) {
244 /* FIXME: increment a raw drops counter here */ 304 atomic_inc(&sk->sk_drops);
245 kfree_skb(skb); 305 kfree_skb(skb);
246 return NET_RX_DROP; 306 return NET_RX_DROP;
247 } 307 }
@@ -252,6 +312,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
252int raw_rcv(struct sock *sk, struct sk_buff *skb) 312int raw_rcv(struct sock *sk, struct sk_buff *skb)
253{ 313{
254 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 314 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
315 atomic_inc(&sk->sk_drops);
255 kfree_skb(skb); 316 kfree_skb(skb);
256 return NET_RX_DROP; 317 return NET_RX_DROP;
257 } 318 }
@@ -271,6 +332,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
271 int hh_len; 332 int hh_len;
272 struct iphdr *iph; 333 struct iphdr *iph;
273 struct sk_buff *skb; 334 struct sk_buff *skb;
335 unsigned int iphlen;
274 int err; 336 int err;
275 337
276 if (length > rt->u.dst.dev->mtu) { 338 if (length > rt->u.dst.dev->mtu) {
@@ -290,6 +352,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
290 skb_reserve(skb, hh_len); 352 skb_reserve(skb, hh_len);
291 353
292 skb->priority = sk->sk_priority; 354 skb->priority = sk->sk_priority;
355 skb->mark = sk->sk_mark;
293 skb->dst = dst_clone(&rt->u.dst); 356 skb->dst = dst_clone(&rt->u.dst);
294 357
295 skb_reset_network_header(skb); 358 skb_reset_network_header(skb);
@@ -304,7 +367,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
304 goto error_fault; 367 goto error_fault;
305 368
306 /* We don't modify invalid header */ 369 /* We don't modify invalid header */
307 if (length >= sizeof(*iph) && iph->ihl * 4U <= length) { 370 iphlen = iph->ihl * 4;
371 if (iphlen >= sizeof(*iph) && iphlen <= length) {
308 if (!iph->saddr) 372 if (!iph->saddr)
309 iph->saddr = rt->rt_src; 373 iph->saddr = rt->rt_src;
310 iph->check = 0; 374 iph->check = 0;
@@ -318,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
318 icmp_out_count(((struct icmphdr *) 382 icmp_out_count(((struct icmphdr *)
319 skb_transport_header(skb))->type); 383 skb_transport_header(skb))->type);
320 384
321 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 385 err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
322 dst_output); 386 dst_output);
323 if (err > 0) 387 if (err > 0)
324 err = inet->recverr ? net_xmit_errno(err) : 0; 388 err = inet->recverr ? net_xmit_errno(err) : 0;
@@ -472,7 +536,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
472 if (msg->msg_flags & MSG_DONTROUTE) 536 if (msg->msg_flags & MSG_DONTROUTE)
473 tos |= RTO_ONLINK; 537 tos |= RTO_ONLINK;
474 538
475 if (MULTICAST(daddr)) { 539 if (ipv4_is_multicast(daddr)) {
476 if (!ipc.oif) 540 if (!ipc.oif)
477 ipc.oif = inet->mc_index; 541 ipc.oif = inet->mc_index;
478 if (!saddr) 542 if (!saddr)
@@ -481,6 +545,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
481 545
482 { 546 {
483 struct flowi fl = { .oif = ipc.oif, 547 struct flowi fl = { .oif = ipc.oif,
548 .mark = sk->sk_mark,
484 .nl_u = { .ip4_u = 549 .nl_u = { .ip4_u =
485 { .daddr = daddr, 550 { .daddr = daddr,
486 .saddr = saddr, 551 .saddr = saddr,
@@ -495,7 +560,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
495 } 560 }
496 561
497 security_sk_classify_flow(sk, &fl); 562 security_sk_classify_flow(sk, &fl);
498 err = ip_route_output_flow(&rt, &fl, sk, 1); 563 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
499 } 564 }
500 if (err) 565 if (err)
501 goto done; 566 goto done;
@@ -562,7 +627,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
562 627
563 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) 628 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
564 goto out; 629 goto out;
565 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); 630 chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr);
566 ret = -EADDRNOTAVAIL; 631 ret = -EADDRNOTAVAIL;
567 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && 632 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
568 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) 633 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -760,6 +825,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
760 } 825 }
761} 826}
762 827
828DEFINE_PROTO_INUSE(raw)
829
763struct proto raw_prot = { 830struct proto raw_prot = {
764 .name = "RAW", 831 .name = "RAW",
765 .owner = THIS_MODULE, 832 .owner = THIS_MODULE,
@@ -781,25 +848,21 @@ struct proto raw_prot = {
781 .compat_setsockopt = compat_raw_setsockopt, 848 .compat_setsockopt = compat_raw_setsockopt,
782 .compat_getsockopt = compat_raw_getsockopt, 849 .compat_getsockopt = compat_raw_getsockopt,
783#endif 850#endif
851 REF_PROTO_INUSE(raw)
784}; 852};
785 853
786#ifdef CONFIG_PROC_FS 854#ifdef CONFIG_PROC_FS
787struct raw_iter_state {
788 int bucket;
789};
790
791#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private)
792
793static struct sock *raw_get_first(struct seq_file *seq) 855static struct sock *raw_get_first(struct seq_file *seq)
794{ 856{
795 struct sock *sk; 857 struct sock *sk;
796 struct raw_iter_state* state = raw_seq_private(seq); 858 struct raw_iter_state* state = raw_seq_private(seq);
797 859
798 for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) { 860 for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
861 ++state->bucket) {
799 struct hlist_node *node; 862 struct hlist_node *node;
800 863
801 sk_for_each(sk, node, &raw_v4_htable[state->bucket]) 864 sk_for_each(sk, node, &state->h->ht[state->bucket])
802 if (sk->sk_family == PF_INET) 865 if (sk->sk_net == state->p.net)
803 goto found; 866 goto found;
804 } 867 }
805 sk = NULL; 868 sk = NULL;
@@ -815,10 +878,10 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
815 sk = sk_next(sk); 878 sk = sk_next(sk);
816try_again: 879try_again:
817 ; 880 ;
818 } while (sk && sk->sk_family != PF_INET); 881 } while (sk && sk->sk_net != state->p.net);
819 882
820 if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { 883 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
821 sk = sk_head(&raw_v4_htable[state->bucket]); 884 sk = sk_head(&state->h->ht[state->bucket]);
822 goto try_again; 885 goto try_again;
823 } 886 }
824 return sk; 887 return sk;
@@ -834,13 +897,16 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
834 return pos ? NULL : sk; 897 return pos ? NULL : sk;
835} 898}
836 899
837static void *raw_seq_start(struct seq_file *seq, loff_t *pos) 900void *raw_seq_start(struct seq_file *seq, loff_t *pos)
838{ 901{
839 read_lock(&raw_v4_lock); 902 struct raw_iter_state *state = raw_seq_private(seq);
903
904 read_lock(&state->h->lock);
840 return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 905 return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
841} 906}
907EXPORT_SYMBOL_GPL(raw_seq_start);
842 908
843static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) 909void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
844{ 910{
845 struct sock *sk; 911 struct sock *sk;
846 912
@@ -851,13 +917,17 @@ static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
851 ++*pos; 917 ++*pos;
852 return sk; 918 return sk;
853} 919}
920EXPORT_SYMBOL_GPL(raw_seq_next);
854 921
855static void raw_seq_stop(struct seq_file *seq, void *v) 922void raw_seq_stop(struct seq_file *seq, void *v)
856{ 923{
857 read_unlock(&raw_v4_lock); 924 struct raw_iter_state *state = raw_seq_private(seq);
925
926 read_unlock(&state->h->lock);
858} 927}
928EXPORT_SYMBOL_GPL(raw_seq_stop);
859 929
860static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) 930static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
861{ 931{
862 struct inet_sock *inet = inet_sk(sp); 932 struct inet_sock *inet = inet_sk(sp);
863 __be32 dest = inet->daddr, 933 __be32 dest = inet->daddr,
@@ -865,31 +935,23 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
865 __u16 destp = 0, 935 __u16 destp = 0,
866 srcp = inet->num; 936 srcp = inet->num;
867 937
868 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 938 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
869 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", 939 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d",
870 i, src, srcp, dest, destp, sp->sk_state, 940 i, src, srcp, dest, destp, sp->sk_state,
871 atomic_read(&sp->sk_wmem_alloc), 941 atomic_read(&sp->sk_wmem_alloc),
872 atomic_read(&sp->sk_rmem_alloc), 942 atomic_read(&sp->sk_rmem_alloc),
873 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), 943 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
874 atomic_read(&sp->sk_refcnt), sp); 944 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
875 return tmpbuf;
876} 945}
877 946
878static int raw_seq_show(struct seq_file *seq, void *v) 947static int raw_seq_show(struct seq_file *seq, void *v)
879{ 948{
880 char tmpbuf[129];
881
882 if (v == SEQ_START_TOKEN) 949 if (v == SEQ_START_TOKEN)
883 seq_printf(seq, "%-127s\n", 950 seq_printf(seq, " sl local_address rem_address st tx_queue "
884 " sl local_address rem_address st tx_queue " 951 "rx_queue tr tm->when retrnsmt uid timeout "
885 "rx_queue tr tm->when retrnsmt uid timeout " 952 "inode drops\n");
886 "inode"); 953 else
887 else { 954 raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
888 struct raw_iter_state *state = raw_seq_private(seq);
889
890 seq_printf(seq, "%-127s\n",
891 get_raw_sock(v, tmpbuf, state->bucket));
892 }
893 return 0; 955 return 0;
894} 956}
895 957
@@ -900,29 +962,60 @@ static const struct seq_operations raw_seq_ops = {
900 .show = raw_seq_show, 962 .show = raw_seq_show,
901}; 963};
902 964
903static int raw_seq_open(struct inode *inode, struct file *file) 965int raw_seq_open(struct inode *ino, struct file *file,
966 struct raw_hashinfo *h, const struct seq_operations *ops)
967{
968 int err;
969 struct raw_iter_state *i;
970
971 err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state));
972 if (err < 0)
973 return err;
974
975 i = raw_seq_private((struct seq_file *)file->private_data);
976 i->h = h;
977 return 0;
978}
979EXPORT_SYMBOL_GPL(raw_seq_open);
980
981static int raw_v4_seq_open(struct inode *inode, struct file *file)
904{ 982{
905 return seq_open_private(file, &raw_seq_ops, 983 return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops);
906 sizeof(struct raw_iter_state));
907} 984}
908 985
909static const struct file_operations raw_seq_fops = { 986static const struct file_operations raw_seq_fops = {
910 .owner = THIS_MODULE, 987 .owner = THIS_MODULE,
911 .open = raw_seq_open, 988 .open = raw_v4_seq_open,
912 .read = seq_read, 989 .read = seq_read,
913 .llseek = seq_lseek, 990 .llseek = seq_lseek,
914 .release = seq_release_private, 991 .release = seq_release_net,
915}; 992};
916 993
917int __init raw_proc_init(void) 994static __net_init int raw_init_net(struct net *net)
918{ 995{
919 if (!proc_net_fops_create(&init_net, "raw", S_IRUGO, &raw_seq_fops)) 996 if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
920 return -ENOMEM; 997 return -ENOMEM;
998
921 return 0; 999 return 0;
922} 1000}
923 1001
1002static __net_exit void raw_exit_net(struct net *net)
1003{
1004 proc_net_remove(net, "raw");
1005}
1006
1007static __net_initdata struct pernet_operations raw_net_ops = {
1008 .init = raw_init_net,
1009 .exit = raw_exit_net,
1010};
1011
1012int __init raw_proc_init(void)
1013{
1014 return register_pernet_subsys(&raw_net_ops);
1015}
1016
924void __init raw_proc_exit(void) 1017void __init raw_proc_exit(void)
925{ 1018{
926 proc_net_remove(&init_net, "raw"); 1019 unregister_pernet_subsys(&raw_net_ops);
927} 1020}
928#endif /* CONFIG_PROC_FS */ 1021#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 21b12de9e653..8842ecb9be48 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -92,6 +92,7 @@
92#include <linux/jhash.h> 92#include <linux/jhash.h>
93#include <linux/rcupdate.h> 93#include <linux/rcupdate.h>
94#include <linux/times.h> 94#include <linux/times.h>
95#include <net/dst.h>
95#include <net/net_namespace.h> 96#include <net/net_namespace.h>
96#include <net/protocol.h> 97#include <net/protocol.h>
97#include <net/ip.h> 98#include <net/ip.h>
@@ -116,8 +117,6 @@
116 117
117#define RT_GC_TIMEOUT (300*HZ) 118#define RT_GC_TIMEOUT (300*HZ)
118 119
119static int ip_rt_min_delay = 2 * HZ;
120static int ip_rt_max_delay = 10 * HZ;
121static int ip_rt_max_size; 120static int ip_rt_max_size;
122static int ip_rt_gc_timeout = RT_GC_TIMEOUT; 121static int ip_rt_gc_timeout = RT_GC_TIMEOUT;
123static int ip_rt_gc_interval = 60 * HZ; 122static int ip_rt_gc_interval = 60 * HZ;
@@ -132,13 +131,11 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ;
132static int ip_rt_min_pmtu = 512 + 20 + 20; 131static int ip_rt_min_pmtu = 512 + 20 + 20;
133static int ip_rt_min_advmss = 256; 132static int ip_rt_min_advmss = 256;
134static int ip_rt_secret_interval = 10 * 60 * HZ; 133static int ip_rt_secret_interval = 10 * 60 * HZ;
135static unsigned long rt_deadline;
136 134
137#define RTprint(a...) printk(KERN_DEBUG a) 135#define RTprint(a...) printk(KERN_DEBUG a)
138 136
139static struct timer_list rt_flush_timer; 137static void rt_worker_func(struct work_struct *work);
140static void rt_check_expire(struct work_struct *work); 138static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
141static DECLARE_DELAYED_WORK(expires_work, rt_check_expire);
142static struct timer_list rt_secret_timer; 139static struct timer_list rt_secret_timer;
143 140
144/* 141/*
@@ -152,7 +149,7 @@ static void ipv4_dst_ifdown(struct dst_entry *dst,
152static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 149static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
153static void ipv4_link_failure(struct sk_buff *skb); 150static void ipv4_link_failure(struct sk_buff *skb);
154static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 151static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
155static int rt_garbage_collect(void); 152static int rt_garbage_collect(struct dst_ops *ops);
156 153
157 154
158static struct dst_ops ipv4_dst_ops = { 155static struct dst_ops ipv4_dst_ops = {
@@ -165,7 +162,9 @@ static struct dst_ops ipv4_dst_ops = {
165 .negative_advice = ipv4_negative_advice, 162 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure, 163 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu, 164 .update_pmtu = ip_rt_update_pmtu,
165 .local_out = ip_local_out,
168 .entry_size = sizeof(struct rtable), 166 .entry_size = sizeof(struct rtable),
167 .entries = ATOMIC_INIT(0),
169}; 168};
170 169
171#define ECN_OR_COST(class) TC_PRIO_##class 170#define ECN_OR_COST(class) TC_PRIO_##class
@@ -232,34 +231,40 @@ struct rt_hash_bucket {
232 231
233static spinlock_t *rt_hash_locks; 232static spinlock_t *rt_hash_locks;
234# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] 233# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
235# define rt_hash_lock_init() { \ 234
236 int i; \ 235static __init void rt_hash_lock_init(void)
237 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \ 236{
238 if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \ 237 int i;
239 for (i = 0; i < RT_HASH_LOCK_SZ; i++) \ 238
240 spin_lock_init(&rt_hash_locks[i]); \ 239 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ,
241 } 240 GFP_KERNEL);
241 if (!rt_hash_locks)
242 panic("IP: failed to allocate rt_hash_locks\n");
243
244 for (i = 0; i < RT_HASH_LOCK_SZ; i++)
245 spin_lock_init(&rt_hash_locks[i]);
246}
242#else 247#else
243# define rt_hash_lock_addr(slot) NULL 248# define rt_hash_lock_addr(slot) NULL
244# define rt_hash_lock_init() 249
250static inline void rt_hash_lock_init(void)
251{
252}
245#endif 253#endif
246 254
247static struct rt_hash_bucket *rt_hash_table; 255static struct rt_hash_bucket *rt_hash_table;
248static unsigned rt_hash_mask; 256static unsigned rt_hash_mask;
249static unsigned int rt_hash_log; 257static unsigned int rt_hash_log;
250static unsigned int rt_hash_rnd; 258static atomic_t rt_genid;
251 259
252static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 260static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
253#define RT_CACHE_STAT_INC(field) \ 261#define RT_CACHE_STAT_INC(field) \
254 (__raw_get_cpu_var(rt_cache_stat).field++) 262 (__raw_get_cpu_var(rt_cache_stat).field++)
255 263
256static int rt_intern_hash(unsigned hash, struct rtable *rth,
257 struct rtable **res);
258
259static unsigned int rt_hash_code(u32 daddr, u32 saddr) 264static unsigned int rt_hash_code(u32 daddr, u32 saddr)
260{ 265{
261 return (jhash_2words(daddr, saddr, rt_hash_rnd) 266 return jhash_2words(daddr, saddr, atomic_read(&rt_genid))
262 & rt_hash_mask); 267 & rt_hash_mask;
263} 268}
264 269
265#define rt_hash(daddr, saddr, idx) \ 270#define rt_hash(daddr, saddr, idx) \
@@ -269,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
269#ifdef CONFIG_PROC_FS 274#ifdef CONFIG_PROC_FS
270struct rt_cache_iter_state { 275struct rt_cache_iter_state {
271 int bucket; 276 int bucket;
277 int genid;
272}; 278};
273 279
274static struct rtable *rt_cache_get_first(struct seq_file *seq) 280static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
275{ 281{
276 struct rtable *r = NULL; 282 struct rtable *r = NULL;
277 struct rt_cache_iter_state *st = seq->private;
278 283
279 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 284 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
280 rcu_read_lock_bh(); 285 rcu_read_lock_bh();
281 r = rt_hash_table[st->bucket].chain; 286 r = rcu_dereference(rt_hash_table[st->bucket].chain);
282 if (r) 287 while (r) {
283 break; 288 if (r->rt_genid == st->genid)
289 return r;
290 r = rcu_dereference(r->u.dst.rt_next);
291 }
284 rcu_read_unlock_bh(); 292 rcu_read_unlock_bh();
285 } 293 }
286 return r; 294 return r;
287} 295}
288 296
289static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) 297static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r)
290{ 298{
291 struct rt_cache_iter_state *st = rcu_dereference(seq->private);
292
293 r = r->u.dst.rt_next; 299 r = r->u.dst.rt_next;
294 while (!r) { 300 while (!r) {
295 rcu_read_unlock_bh(); 301 rcu_read_unlock_bh();
@@ -298,32 +304,41 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
298 rcu_read_lock_bh(); 304 rcu_read_lock_bh();
299 r = rt_hash_table[st->bucket].chain; 305 r = rt_hash_table[st->bucket].chain;
300 } 306 }
301 return r; 307 return rcu_dereference(r);
302} 308}
303 309
304static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) 310static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos)
305{ 311{
306 struct rtable *r = rt_cache_get_first(seq); 312 struct rtable *r = rt_cache_get_first(st);
307 313
308 if (r) 314 if (r)
309 while (pos && (r = rt_cache_get_next(seq, r))) 315 while (pos && (r = rt_cache_get_next(st, r))) {
316 if (r->rt_genid != st->genid)
317 continue;
310 --pos; 318 --pos;
319 }
311 return pos ? NULL : r; 320 return pos ? NULL : r;
312} 321}
313 322
314static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 323static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
315{ 324{
316 return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 325 struct rt_cache_iter_state *st = seq->private;
326
327 if (*pos)
328 return rt_cache_get_idx(st, *pos - 1);
329 st->genid = atomic_read(&rt_genid);
330 return SEQ_START_TOKEN;
317} 331}
318 332
319static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 333static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
320{ 334{
321 struct rtable *r = NULL; 335 struct rtable *r;
336 struct rt_cache_iter_state *st = seq->private;
322 337
323 if (v == SEQ_START_TOKEN) 338 if (v == SEQ_START_TOKEN)
324 r = rt_cache_get_first(seq); 339 r = rt_cache_get_first(st);
325 else 340 else
326 r = rt_cache_get_next(seq, v); 341 r = rt_cache_get_next(st, v);
327 ++*pos; 342 ++*pos;
328 return r; 343 return r;
329} 344}
@@ -478,6 +493,83 @@ static const struct file_operations rt_cpu_seq_fops = {
478 .release = seq_release, 493 .release = seq_release,
479}; 494};
480 495
496#ifdef CONFIG_NET_CLS_ROUTE
497static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
498 int length, int *eof, void *data)
499{
500 unsigned int i;
501
502 if ((offset & 3) || (length & 3))
503 return -EIO;
504
505 if (offset >= sizeof(struct ip_rt_acct) * 256) {
506 *eof = 1;
507 return 0;
508 }
509
510 if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
511 length = sizeof(struct ip_rt_acct) * 256 - offset;
512 *eof = 1;
513 }
514
515 offset /= sizeof(u32);
516
517 if (length > 0) {
518 u32 *dst = (u32 *) buffer;
519
520 *start = buffer;
521 memset(dst, 0, length);
522
523 for_each_possible_cpu(i) {
524 unsigned int j;
525 u32 *src;
526
527 src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset;
528 for (j = 0; j < length/4; j++)
529 dst[j] += src[j];
530 }
531 }
532 return length;
533}
534#endif
535
536static __init int ip_rt_proc_init(struct net *net)
537{
538 struct proc_dir_entry *pde;
539
540 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
541 &rt_cache_seq_fops);
542 if (!pde)
543 goto err1;
544
545 pde = create_proc_entry("rt_cache", S_IRUGO, net->proc_net_stat);
546 if (!pde)
547 goto err2;
548
549 pde->proc_fops = &rt_cpu_seq_fops;
550
551#ifdef CONFIG_NET_CLS_ROUTE
552 pde = create_proc_read_entry("rt_acct", 0, net->proc_net,
553 ip_rt_acct_read, NULL);
554 if (!pde)
555 goto err3;
556#endif
557 return 0;
558
559#ifdef CONFIG_NET_CLS_ROUTE
560err3:
561 remove_proc_entry("rt_cache", net->proc_net_stat);
562#endif
563err2:
564 remove_proc_entry("rt_cache", net->proc_net);
565err1:
566 return -ENOMEM;
567}
568#else
569static inline int ip_rt_proc_init(struct net *net)
570{
571 return 0;
572}
481#endif /* CONFIG_PROC_FS */ 573#endif /* CONFIG_PROC_FS */
482 574
483static __inline__ void rt_free(struct rtable *rt) 575static __inline__ void rt_free(struct rtable *rt)
@@ -559,7 +651,41 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
559 (fl1->iif ^ fl2->iif)) == 0; 651 (fl1->iif ^ fl2->iif)) == 0;
560} 652}
561 653
562static void rt_check_expire(struct work_struct *work) 654static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
655{
656 return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net;
657}
658
659/*
660 * Perform a full scan of hash table and free all entries.
661 * Can be called by a softirq or a process.
662 * In the later case, we want to be reschedule if necessary
663 */
664static void rt_do_flush(int process_context)
665{
666 unsigned int i;
667 struct rtable *rth, *next;
668
669 for (i = 0; i <= rt_hash_mask; i++) {
670 if (process_context && need_resched())
671 cond_resched();
672 rth = rt_hash_table[i].chain;
673 if (!rth)
674 continue;
675
676 spin_lock_bh(rt_hash_lock_addr(i));
677 rth = rt_hash_table[i].chain;
678 rt_hash_table[i].chain = NULL;
679 spin_unlock_bh(rt_hash_lock_addr(i));
680
681 for (; rth; rth = next) {
682 next = rth->u.dst.rt_next;
683 rt_free(rth);
684 }
685 }
686}
687
688static void rt_check_expire(void)
563{ 689{
564 static unsigned int rover; 690 static unsigned int rover;
565 unsigned int i = rover, goal; 691 unsigned int i = rover, goal;
@@ -578,10 +704,18 @@ static void rt_check_expire(struct work_struct *work)
578 i = (i + 1) & rt_hash_mask; 704 i = (i + 1) & rt_hash_mask;
579 rthp = &rt_hash_table[i].chain; 705 rthp = &rt_hash_table[i].chain;
580 706
707 if (need_resched())
708 cond_resched();
709
581 if (*rthp == NULL) 710 if (*rthp == NULL)
582 continue; 711 continue;
583 spin_lock_bh(rt_hash_lock_addr(i)); 712 spin_lock_bh(rt_hash_lock_addr(i));
584 while ((rth = *rthp) != NULL) { 713 while ((rth = *rthp) != NULL) {
714 if (rth->rt_genid != atomic_read(&rt_genid)) {
715 *rthp = rth->u.dst.rt_next;
716 rt_free(rth);
717 continue;
718 }
585 if (rth->u.dst.expires) { 719 if (rth->u.dst.expires) {
586 /* Entry is expired even if it is in use */ 720 /* Entry is expired even if it is in use */
587 if (time_before_eq(jiffies, rth->u.dst.expires)) { 721 if (time_before_eq(jiffies, rth->u.dst.expires)) {
@@ -602,83 +736,50 @@ static void rt_check_expire(struct work_struct *work)
602 spin_unlock_bh(rt_hash_lock_addr(i)); 736 spin_unlock_bh(rt_hash_lock_addr(i));
603 } 737 }
604 rover = i; 738 rover = i;
605 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
606} 739}
607 740
608/* This can run from both BH and non-BH contexts, the latter 741/*
609 * in the case of a forced flush event. 742 * rt_worker_func() is run in process context.
743 * we call rt_check_expire() to scan part of the hash table
610 */ 744 */
611static void rt_run_flush(unsigned long dummy) 745static void rt_worker_func(struct work_struct *work)
612{ 746{
613 int i; 747 rt_check_expire();
614 struct rtable *rth, *next; 748 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
615 749}
616 rt_deadline = 0;
617
618 get_random_bytes(&rt_hash_rnd, 4);
619 750
620 for (i = rt_hash_mask; i >= 0; i--) { 751/*
621 spin_lock_bh(rt_hash_lock_addr(i)); 752 * Pertubation of rt_genid by a small quantity [1..256]
622 rth = rt_hash_table[i].chain; 753 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
623 if (rth) 754 * many times (2^24) without giving recent rt_genid.
624 rt_hash_table[i].chain = NULL; 755 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
625 spin_unlock_bh(rt_hash_lock_addr(i)); 756 */
757static void rt_cache_invalidate(void)
758{
759 unsigned char shuffle;
626 760
627 for (; rth; rth = next) { 761 get_random_bytes(&shuffle, sizeof(shuffle));
628 next = rth->u.dst.rt_next; 762 atomic_add(shuffle + 1U, &rt_genid);
629 rt_free(rth);
630 }
631 }
632} 763}
633 764
634static DEFINE_SPINLOCK(rt_flush_lock); 765/*
635 766 * delay < 0 : invalidate cache (fast : entries will be deleted later)
767 * delay >= 0 : invalidate & flush cache (can be long)
768 */
636void rt_cache_flush(int delay) 769void rt_cache_flush(int delay)
637{ 770{
638 unsigned long now = jiffies; 771 rt_cache_invalidate();
639 int user_mode = !in_softirq(); 772 if (delay >= 0)
640 773 rt_do_flush(!in_softirq());
641 if (delay < 0)
642 delay = ip_rt_min_delay;
643
644 spin_lock_bh(&rt_flush_lock);
645
646 if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
647 long tmo = (long)(rt_deadline - now);
648
649 /* If flush timer is already running
650 and flush request is not immediate (delay > 0):
651
652 if deadline is not achieved, prolongate timer to "delay",
653 otherwise fire it at deadline time.
654 */
655
656 if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay)
657 tmo = 0;
658
659 if (delay > tmo)
660 delay = tmo;
661 }
662
663 if (delay <= 0) {
664 spin_unlock_bh(&rt_flush_lock);
665 rt_run_flush(0);
666 return;
667 }
668
669 if (rt_deadline == 0)
670 rt_deadline = now + ip_rt_max_delay;
671
672 mod_timer(&rt_flush_timer, now+delay);
673 spin_unlock_bh(&rt_flush_lock);
674} 774}
675 775
776/*
777 * We change rt_genid and let gc do the cleanup
778 */
676static void rt_secret_rebuild(unsigned long dummy) 779static void rt_secret_rebuild(unsigned long dummy)
677{ 780{
678 unsigned long now = jiffies; 781 rt_cache_invalidate();
679 782 mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
680 rt_cache_flush(0);
681 mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
682} 783}
683 784
684/* 785/*
@@ -694,7 +795,7 @@ static void rt_secret_rebuild(unsigned long dummy)
694 and when load increases it reduces to limit cache size. 795 and when load increases it reduces to limit cache size.
695 */ 796 */
696 797
697static int rt_garbage_collect(void) 798static int rt_garbage_collect(struct dst_ops *ops)
698{ 799{
699 static unsigned long expire = RT_GC_TIMEOUT; 800 static unsigned long expire = RT_GC_TIMEOUT;
700 static unsigned long last_gc; 801 static unsigned long last_gc;
@@ -725,14 +826,14 @@ static int rt_garbage_collect(void)
725 equilibrium = ipv4_dst_ops.gc_thresh; 826 equilibrium = ipv4_dst_ops.gc_thresh;
726 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 827 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
727 if (goal > 0) { 828 if (goal > 0) {
728 equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1); 829 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
729 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 830 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
730 } 831 }
731 } else { 832 } else {
732 /* We are in dangerous area. Try to reduce cache really 833 /* We are in dangerous area. Try to reduce cache really
733 * aggressively. 834 * aggressively.
734 */ 835 */
735 goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1); 836 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
736 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 837 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
737 } 838 }
738 839
@@ -754,7 +855,8 @@ static int rt_garbage_collect(void)
754 rthp = &rt_hash_table[k].chain; 855 rthp = &rt_hash_table[k].chain;
755 spin_lock_bh(rt_hash_lock_addr(k)); 856 spin_lock_bh(rt_hash_lock_addr(k));
756 while ((rth = *rthp) != NULL) { 857 while ((rth = *rthp) != NULL) {
757 if (!rt_may_expire(rth, tmo, expire)) { 858 if (rth->rt_genid == atomic_read(&rt_genid) &&
859 !rt_may_expire(rth, tmo, expire)) {
758 tmo >>= 1; 860 tmo >>= 1;
759 rthp = &rth->u.dst.rt_next; 861 rthp = &rth->u.dst.rt_next;
760 continue; 862 continue;
@@ -835,7 +937,12 @@ restart:
835 937
836 spin_lock_bh(rt_hash_lock_addr(hash)); 938 spin_lock_bh(rt_hash_lock_addr(hash));
837 while ((rth = *rthp) != NULL) { 939 while ((rth = *rthp) != NULL) {
838 if (compare_keys(&rth->fl, &rt->fl)) { 940 if (rth->rt_genid != atomic_read(&rt_genid)) {
941 *rthp = rth->u.dst.rt_next;
942 rt_free(rth);
943 continue;
944 }
945 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
839 /* Put it first */ 946 /* Put it first */
840 *rthp = rth->u.dst.rt_next; 947 *rthp = rth->u.dst.rt_next;
841 /* 948 /*
@@ -851,9 +958,7 @@ restart:
851 */ 958 */
852 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 959 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
853 960
854 rth->u.dst.__use++; 961 dst_use(&rth->u.dst, now);
855 dst_hold(&rth->u.dst);
856 rth->u.dst.lastuse = now;
857 spin_unlock_bh(rt_hash_lock_addr(hash)); 962 spin_unlock_bh(rt_hash_lock_addr(hash));
858 963
859 rt_drop(rt); 964 rt_drop(rt);
@@ -911,7 +1016,7 @@ restart:
911 int saved_int = ip_rt_gc_min_interval; 1016 int saved_int = ip_rt_gc_min_interval;
912 ip_rt_gc_elasticity = 1; 1017 ip_rt_gc_elasticity = 1;
913 ip_rt_gc_min_interval = 0; 1018 ip_rt_gc_min_interval = 0;
914 rt_garbage_collect(); 1019 rt_garbage_collect(&ipv4_dst_ops);
915 ip_rt_gc_min_interval = saved_int; 1020 ip_rt_gc_min_interval = saved_int;
916 ip_rt_gc_elasticity = saved_elasticity; 1021 ip_rt_gc_elasticity = saved_elasticity;
917 goto restart; 1022 goto restart;
@@ -1002,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1002 1107
1003static void rt_del(unsigned hash, struct rtable *rt) 1108static void rt_del(unsigned hash, struct rtable *rt)
1004{ 1109{
1005 struct rtable **rthp; 1110 struct rtable **rthp, *aux;
1006 1111
1112 rthp = &rt_hash_table[hash].chain;
1007 spin_lock_bh(rt_hash_lock_addr(hash)); 1113 spin_lock_bh(rt_hash_lock_addr(hash));
1008 ip_rt_put(rt); 1114 ip_rt_put(rt);
1009 for (rthp = &rt_hash_table[hash].chain; *rthp; 1115 while ((aux = *rthp) != NULL) {
1010 rthp = &(*rthp)->u.dst.rt_next) 1116 if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
1011 if (*rthp == rt) { 1117 *rthp = aux->u.dst.rt_next;
1012 *rthp = rt->u.dst.rt_next; 1118 rt_free(aux);
1013 rt_free(rt); 1119 continue;
1014 break;
1015 } 1120 }
1121 rthp = &aux->u.dst.rt_next;
1122 }
1016 spin_unlock_bh(rt_hash_lock_addr(hash)); 1123 spin_unlock_bh(rt_hash_lock_addr(hash));
1017} 1124}
1018 1125
@@ -1030,7 +1137,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1030 return; 1137 return;
1031 1138
1032 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1139 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1033 || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw)) 1140 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
1141 || ipv4_is_zeronet(new_gw))
1034 goto reject_redirect; 1142 goto reject_redirect;
1035 1143
1036 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1144 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
@@ -1039,7 +1147,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1039 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1147 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1040 goto reject_redirect; 1148 goto reject_redirect;
1041 } else { 1149 } else {
1042 if (inet_addr_type(new_gw) != RTN_UNICAST) 1150 if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST)
1043 goto reject_redirect; 1151 goto reject_redirect;
1044 } 1152 }
1045 1153
@@ -1056,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1056 if (rth->fl.fl4_dst != daddr || 1164 if (rth->fl.fl4_dst != daddr ||
1057 rth->fl.fl4_src != skeys[i] || 1165 rth->fl.fl4_src != skeys[i] ||
1058 rth->fl.oif != ikeys[k] || 1166 rth->fl.oif != ikeys[k] ||
1059 rth->fl.iif != 0) { 1167 rth->fl.iif != 0 ||
1168 rth->rt_genid != atomic_read(&rt_genid)) {
1060 rthp = &rth->u.dst.rt_next; 1169 rthp = &rth->u.dst.rt_next;
1061 continue; 1170 continue;
1062 } 1171 }
@@ -1094,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1094 rt->u.dst.neighbour = NULL; 1203 rt->u.dst.neighbour = NULL;
1095 rt->u.dst.hh = NULL; 1204 rt->u.dst.hh = NULL;
1096 rt->u.dst.xfrm = NULL; 1205 rt->u.dst.xfrm = NULL;
1097 1206 rt->rt_genid = atomic_read(&rt_genid);
1098 rt->rt_flags |= RTCF_REDIRECTED; 1207 rt->rt_flags |= RTCF_REDIRECTED;
1099 1208
1100 /* Gateway is different ... */ 1209 /* Gateway is different ... */
@@ -1160,7 +1269,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1160 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1269 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1161 rt->fl.oif); 1270 rt->fl.oif);
1162#if RT_CACHE_DEBUG >= 1 1271#if RT_CACHE_DEBUG >= 1
1163 printk(KERN_DEBUG "ip_rt_advice: redirect to " 1272 printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
1164 "%u.%u.%u.%u/%02x dropped\n", 1273 "%u.%u.%u.%u/%02x dropped\n",
1165 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); 1274 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
1166#endif 1275#endif
@@ -1251,6 +1360,7 @@ static int ip_error(struct sk_buff *skb)
1251 break; 1360 break;
1252 case ENETUNREACH: 1361 case ENETUNREACH:
1253 code = ICMP_NET_UNREACH; 1362 code = ICMP_NET_UNREACH;
1363 IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
1254 break; 1364 break;
1255 case EACCES: 1365 case EACCES:
1256 code = ICMP_PKT_FILTERED; 1366 code = ICMP_PKT_FILTERED;
@@ -1289,7 +1399,8 @@ static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
1289 return 68; 1399 return 68;
1290} 1400}
1291 1401
1292unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) 1402unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1403 unsigned short new_mtu)
1293{ 1404{
1294 int i; 1405 int i;
1295 unsigned short old_mtu = ntohs(iph->tot_len); 1406 unsigned short old_mtu = ntohs(iph->tot_len);
@@ -1312,7 +1423,9 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
1312 rth->rt_dst == daddr && 1423 rth->rt_dst == daddr &&
1313 rth->rt_src == iph->saddr && 1424 rth->rt_src == iph->saddr &&
1314 rth->fl.iif == 0 && 1425 rth->fl.iif == 0 &&
1315 !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { 1426 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
1427 rth->u.dst.dev->nd_net == net &&
1428 rth->rt_genid == atomic_read(&rt_genid)) {
1316 unsigned short mtu = new_mtu; 1429 unsigned short mtu = new_mtu;
1317 1430
1318 if (new_mtu < 68 || new_mtu >= old_mtu) { 1431 if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1387,8 +1500,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1387{ 1500{
1388 struct rtable *rt = (struct rtable *) dst; 1501 struct rtable *rt = (struct rtable *) dst;
1389 struct in_device *idev = rt->idev; 1502 struct in_device *idev = rt->idev;
1390 if (dev != init_net.loopback_dev && idev && idev->dev == dev) { 1503 if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) {
1391 struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); 1504 struct in_device *loopback_idev =
1505 in_dev_get(dev->nd_net->loopback_dev);
1392 if (loopback_idev) { 1506 if (loopback_idev) {
1393 rt->idev = loopback_idev; 1507 rt->idev = loopback_idev;
1394 in_dev_put(idev); 1508 in_dev_put(idev);
@@ -1432,7 +1546,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1432 1546
1433 if (rt->fl.iif == 0) 1547 if (rt->fl.iif == 0)
1434 src = rt->rt_src; 1548 src = rt->rt_src;
1435 else if (fib_lookup(&rt->fl, &res) == 0) { 1549 else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) {
1436 src = FIB_RES_PREFSRC(res); 1550 src = FIB_RES_PREFSRC(res);
1437 fib_res_put(&res); 1551 fib_res_put(&res);
1438 } else 1552 } else
@@ -1507,12 +1621,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1507 if (in_dev == NULL) 1621 if (in_dev == NULL)
1508 return -EINVAL; 1622 return -EINVAL;
1509 1623
1510 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) || 1624 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1511 skb->protocol != htons(ETH_P_IP)) 1625 ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
1512 goto e_inval; 1626 goto e_inval;
1513 1627
1514 if (ZERONET(saddr)) { 1628 if (ipv4_is_zeronet(saddr)) {
1515 if (!LOCAL_MCAST(daddr)) 1629 if (!ipv4_is_local_multicast(daddr))
1516 goto e_inval; 1630 goto e_inval;
1517 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1631 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1518 } else if (fib_validate_source(saddr, 0, tos, 0, 1632 } else if (fib_validate_source(saddr, 0, tos, 0,
@@ -1546,15 +1660,16 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1546 rth->fl.oif = 0; 1660 rth->fl.oif = 0;
1547 rth->rt_gateway = daddr; 1661 rth->rt_gateway = daddr;
1548 rth->rt_spec_dst= spec_dst; 1662 rth->rt_spec_dst= spec_dst;
1549 rth->rt_type = RTN_MULTICAST; 1663 rth->rt_genid = atomic_read(&rt_genid);
1550 rth->rt_flags = RTCF_MULTICAST; 1664 rth->rt_flags = RTCF_MULTICAST;
1665 rth->rt_type = RTN_MULTICAST;
1551 if (our) { 1666 if (our) {
1552 rth->u.dst.input= ip_local_deliver; 1667 rth->u.dst.input= ip_local_deliver;
1553 rth->rt_flags |= RTCF_LOCAL; 1668 rth->rt_flags |= RTCF_LOCAL;
1554 } 1669 }
1555 1670
1556#ifdef CONFIG_IP_MROUTE 1671#ifdef CONFIG_IP_MROUTE
1557 if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 1672 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1558 rth->u.dst.input = ip_mr_input; 1673 rth->u.dst.input = ip_mr_input;
1559#endif 1674#endif
1560 RT_CACHE_STAT_INC(in_slow_mc); 1675 RT_CACHE_STAT_INC(in_slow_mc);
@@ -1641,7 +1756,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
1641 if (err) 1756 if (err)
1642 flags |= RTCF_DIRECTSRC; 1757 flags |= RTCF_DIRECTSRC;
1643 1758
1644 if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) && 1759 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
1645 (IN_DEV_SHARED_MEDIA(out_dev) || 1760 (IN_DEV_SHARED_MEDIA(out_dev) ||
1646 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1761 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1647 flags |= RTCF_DOREDIRECT; 1762 flags |= RTCF_DOREDIRECT;
@@ -1650,7 +1765,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
1650 /* Not IP (i.e. ARP). Do not create route, if it is 1765 /* Not IP (i.e. ARP). Do not create route, if it is
1651 * invalid for proxy arp. DNAT routes are always valid. 1766 * invalid for proxy arp. DNAT routes are always valid.
1652 */ 1767 */
1653 if (out_dev == in_dev && !(flags & RTCF_DNAT)) { 1768 if (out_dev == in_dev) {
1654 err = -EINVAL; 1769 err = -EINVAL;
1655 goto cleanup; 1770 goto cleanup;
1656 } 1771 }
@@ -1686,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
1686 1801
1687 rth->u.dst.input = ip_forward; 1802 rth->u.dst.input = ip_forward;
1688 rth->u.dst.output = ip_output; 1803 rth->u.dst.output = ip_output;
1804 rth->rt_genid = atomic_read(&rt_genid);
1689 1805
1690 rt_set_nexthop(rth, res, itag); 1806 rt_set_nexthop(rth, res, itag);
1691 1807
@@ -1754,6 +1870,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1754 __be32 spec_dst; 1870 __be32 spec_dst;
1755 int err = -EINVAL; 1871 int err = -EINVAL;
1756 int free_res = 0; 1872 int free_res = 0;
1873 struct net * net = dev->nd_net;
1757 1874
1758 /* IP on this device is disabled. */ 1875 /* IP on this device is disabled. */
1759 1876
@@ -1764,7 +1881,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1764 by fib_lookup. 1881 by fib_lookup.
1765 */ 1882 */
1766 1883
1767 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) 1884 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1885 ipv4_is_loopback(saddr))
1768 goto martian_source; 1886 goto martian_source;
1769 1887
1770 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) 1888 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
@@ -1773,16 +1891,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1773 /* Accept zero addresses only to limited broadcast; 1891 /* Accept zero addresses only to limited broadcast;
1774 * I even do not know to fix it or not. Waiting for complains :-) 1892 * I even do not know to fix it or not. Waiting for complains :-)
1775 */ 1893 */
1776 if (ZERONET(saddr)) 1894 if (ipv4_is_zeronet(saddr))
1777 goto martian_source; 1895 goto martian_source;
1778 1896
1779 if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) 1897 if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) ||
1898 ipv4_is_loopback(daddr))
1780 goto martian_destination; 1899 goto martian_destination;
1781 1900
1782 /* 1901 /*
1783 * Now we are ready to route packet. 1902 * Now we are ready to route packet.
1784 */ 1903 */
1785 if ((err = fib_lookup(&fl, &res)) != 0) { 1904 if ((err = fib_lookup(net, &fl, &res)) != 0) {
1786 if (!IN_DEV_FORWARD(in_dev)) 1905 if (!IN_DEV_FORWARD(in_dev))
1787 goto e_hostunreach; 1906 goto e_hostunreach;
1788 goto no_route; 1907 goto no_route;
@@ -1797,7 +1916,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1797 if (res.type == RTN_LOCAL) { 1916 if (res.type == RTN_LOCAL) {
1798 int result; 1917 int result;
1799 result = fib_validate_source(saddr, daddr, tos, 1918 result = fib_validate_source(saddr, daddr, tos,
1800 init_net.loopback_dev->ifindex, 1919 net->loopback_dev->ifindex,
1801 dev, &spec_dst, &itag); 1920 dev, &spec_dst, &itag);
1802 if (result < 0) 1921 if (result < 0)
1803 goto martian_source; 1922 goto martian_source;
@@ -1813,11 +1932,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1813 goto martian_destination; 1932 goto martian_destination;
1814 1933
1815 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 1934 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1816 if (err == -ENOBUFS)
1817 goto e_nobufs;
1818 if (err == -EINVAL)
1819 goto e_inval;
1820
1821done: 1935done:
1822 in_dev_put(in_dev); 1936 in_dev_put(in_dev);
1823 if (free_res) 1937 if (free_res)
@@ -1828,7 +1942,7 @@ brd_input:
1828 if (skb->protocol != htons(ETH_P_IP)) 1942 if (skb->protocol != htons(ETH_P_IP))
1829 goto e_inval; 1943 goto e_inval;
1830 1944
1831 if (ZERONET(saddr)) 1945 if (ipv4_is_zeronet(saddr))
1832 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1946 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1833 else { 1947 else {
1834 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1948 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
@@ -1848,6 +1962,7 @@ local_input:
1848 goto e_nobufs; 1962 goto e_nobufs;
1849 1963
1850 rth->u.dst.output= ip_rt_bug; 1964 rth->u.dst.output= ip_rt_bug;
1965 rth->rt_genid = atomic_read(&rt_genid);
1851 1966
1852 atomic_set(&rth->u.dst.__refcnt, 1); 1967 atomic_set(&rth->u.dst.__refcnt, 1);
1853 rth->u.dst.flags= DST_HOST; 1968 rth->u.dst.flags= DST_HOST;
@@ -1864,7 +1979,7 @@ local_input:
1864#endif 1979#endif
1865 rth->rt_iif = 1980 rth->rt_iif =
1866 rth->fl.iif = dev->ifindex; 1981 rth->fl.iif = dev->ifindex;
1867 rth->u.dst.dev = init_net.loopback_dev; 1982 rth->u.dst.dev = net->loopback_dev;
1868 dev_hold(rth->u.dst.dev); 1983 dev_hold(rth->u.dst.dev);
1869 rth->idev = in_dev_get(rth->u.dst.dev); 1984 rth->idev = in_dev_get(rth->u.dst.dev);
1870 rth->rt_gateway = daddr; 1985 rth->rt_gateway = daddr;
@@ -1885,6 +2000,8 @@ no_route:
1885 RT_CACHE_STAT_INC(in_no_route); 2000 RT_CACHE_STAT_INC(in_no_route);
1886 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 2001 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1887 res.type = RTN_UNREACHABLE; 2002 res.type = RTN_UNREACHABLE;
2003 if (err == -ESRCH)
2004 err = -ENETUNREACH;
1888 goto local_input; 2005 goto local_input;
1889 2006
1890 /* 2007 /*
@@ -1922,7 +2039,9 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1922 struct rtable * rth; 2039 struct rtable * rth;
1923 unsigned hash; 2040 unsigned hash;
1924 int iif = dev->ifindex; 2041 int iif = dev->ifindex;
2042 struct net *net;
1925 2043
2044 net = skb->dev->nd_net;
1926 tos &= IPTOS_RT_MASK; 2045 tos &= IPTOS_RT_MASK;
1927 hash = rt_hash(daddr, saddr, iif); 2046 hash = rt_hash(daddr, saddr, iif);
1928 2047
@@ -1934,10 +2053,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1934 rth->fl.iif == iif && 2053 rth->fl.iif == iif &&
1935 rth->fl.oif == 0 && 2054 rth->fl.oif == 0 &&
1936 rth->fl.mark == skb->mark && 2055 rth->fl.mark == skb->mark &&
1937 rth->fl.fl4_tos == tos) { 2056 rth->fl.fl4_tos == tos &&
1938 rth->u.dst.lastuse = jiffies; 2057 rth->u.dst.dev->nd_net == net &&
1939 dst_hold(&rth->u.dst); 2058 rth->rt_genid == atomic_read(&rt_genid)) {
1940 rth->u.dst.__use++; 2059 dst_use(&rth->u.dst, jiffies);
1941 RT_CACHE_STAT_INC(in_hit); 2060 RT_CACHE_STAT_INC(in_hit);
1942 rcu_read_unlock(); 2061 rcu_read_unlock();
1943 skb->dst = (struct dst_entry*)rth; 2062 skb->dst = (struct dst_entry*)rth;
@@ -1958,7 +2077,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1958 Note, that multicast routers are not affected, because 2077 Note, that multicast routers are not affected, because
1959 route cache entry is created eventually. 2078 route cache entry is created eventually.
1960 */ 2079 */
1961 if (MULTICAST(daddr)) { 2080 if (ipv4_is_multicast(daddr)) {
1962 struct in_device *in_dev; 2081 struct in_device *in_dev;
1963 2082
1964 rcu_read_lock(); 2083 rcu_read_lock();
@@ -1967,7 +2086,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1967 ip_hdr(skb)->protocol); 2086 ip_hdr(skb)->protocol);
1968 if (our 2087 if (our
1969#ifdef CONFIG_IP_MROUTE 2088#ifdef CONFIG_IP_MROUTE
1970 || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 2089 || (!ipv4_is_local_multicast(daddr) &&
2090 IN_DEV_MFORWARD(in_dev))
1971#endif 2091#endif
1972 ) { 2092 ) {
1973 rcu_read_unlock(); 2093 rcu_read_unlock();
@@ -1993,14 +2113,14 @@ static inline int __mkroute_output(struct rtable **result,
1993 u32 tos = RT_FL_TOS(oldflp); 2113 u32 tos = RT_FL_TOS(oldflp);
1994 int err = 0; 2114 int err = 0;
1995 2115
1996 if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2116 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
1997 return -EINVAL; 2117 return -EINVAL;
1998 2118
1999 if (fl->fl4_dst == htonl(0xFFFFFFFF)) 2119 if (fl->fl4_dst == htonl(0xFFFFFFFF))
2000 res->type = RTN_BROADCAST; 2120 res->type = RTN_BROADCAST;
2001 else if (MULTICAST(fl->fl4_dst)) 2121 else if (ipv4_is_multicast(fl->fl4_dst))
2002 res->type = RTN_MULTICAST; 2122 res->type = RTN_MULTICAST;
2003 else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst)) 2123 else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst))
2004 return -EINVAL; 2124 return -EINVAL;
2005 2125
2006 if (dev_out->flags & IFF_LOOPBACK) 2126 if (dev_out->flags & IFF_LOOPBACK)
@@ -2063,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result,
2063 rth->rt_spec_dst= fl->fl4_src; 2183 rth->rt_spec_dst= fl->fl4_src;
2064 2184
2065 rth->u.dst.output=ip_output; 2185 rth->u.dst.output=ip_output;
2186 rth->rt_genid = atomic_read(&rt_genid);
2066 2187
2067 RT_CACHE_STAT_INC(out_slow_tot); 2188 RT_CACHE_STAT_INC(out_slow_tot);
2068 2189
@@ -2080,7 +2201,7 @@ static inline int __mkroute_output(struct rtable **result,
2080#ifdef CONFIG_IP_MROUTE 2201#ifdef CONFIG_IP_MROUTE
2081 if (res->type == RTN_MULTICAST) { 2202 if (res->type == RTN_MULTICAST) {
2082 if (IN_DEV_MFORWARD(in_dev) && 2203 if (IN_DEV_MFORWARD(in_dev) &&
2083 !LOCAL_MCAST(oldflp->fl4_dst)) { 2204 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2084 rth->u.dst.input = ip_mr_input; 2205 rth->u.dst.input = ip_mr_input;
2085 rth->u.dst.output = ip_mc_output; 2206 rth->u.dst.output = ip_mc_output;
2086 } 2207 }
@@ -2122,7 +2243,8 @@ static inline int ip_mkroute_output(struct rtable **rp,
2122 * Major route resolver routine. 2243 * Major route resolver routine.
2123 */ 2244 */
2124 2245
2125static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) 2246static int ip_route_output_slow(struct net *net, struct rtable **rp,
2247 const struct flowi *oldflp)
2126{ 2248{
2127 u32 tos = RT_FL_TOS(oldflp); 2249 u32 tos = RT_FL_TOS(oldflp);
2128 struct flowi fl = { .nl_u = { .ip4_u = 2250 struct flowi fl = { .nl_u = { .ip4_u =
@@ -2134,7 +2256,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2134 RT_SCOPE_UNIVERSE), 2256 RT_SCOPE_UNIVERSE),
2135 } }, 2257 } },
2136 .mark = oldflp->mark, 2258 .mark = oldflp->mark,
2137 .iif = init_net.loopback_dev->ifindex, 2259 .iif = net->loopback_dev->ifindex,
2138 .oif = oldflp->oif }; 2260 .oif = oldflp->oif };
2139 struct fib_result res; 2261 struct fib_result res;
2140 unsigned flags = 0; 2262 unsigned flags = 0;
@@ -2150,26 +2272,27 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2150 2272
2151 if (oldflp->fl4_src) { 2273 if (oldflp->fl4_src) {
2152 err = -EINVAL; 2274 err = -EINVAL;
2153 if (MULTICAST(oldflp->fl4_src) || 2275 if (ipv4_is_multicast(oldflp->fl4_src) ||
2154 BADCLASS(oldflp->fl4_src) || 2276 ipv4_is_lbcast(oldflp->fl4_src) ||
2155 ZERONET(oldflp->fl4_src)) 2277 ipv4_is_zeronet(oldflp->fl4_src))
2156 goto out; 2278 goto out;
2157 2279
2158 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2280 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2159 dev_out = ip_dev_find(oldflp->fl4_src); 2281 dev_out = ip_dev_find(net, oldflp->fl4_src);
2160 if (dev_out == NULL) 2282 if (dev_out == NULL)
2161 goto out; 2283 goto out;
2162 2284
2163 /* I removed check for oif == dev_out->oif here. 2285 /* I removed check for oif == dev_out->oif here.
2164 It was wrong for two reasons: 2286 It was wrong for two reasons:
2165 1. ip_dev_find(saddr) can return wrong iface, if saddr is 2287 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2166 assigned to multiple interfaces. 2288 is assigned to multiple interfaces.
2167 2. Moreover, we are allowed to send packets with saddr 2289 2. Moreover, we are allowed to send packets with saddr
2168 of another iface. --ANK 2290 of another iface. --ANK
2169 */ 2291 */
2170 2292
2171 if (oldflp->oif == 0 2293 if (oldflp->oif == 0
2172 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2294 && (ipv4_is_multicast(oldflp->fl4_dst) ||
2295 oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2173 /* Special hack: user can direct multicasts 2296 /* Special hack: user can direct multicasts
2174 and limited broadcast via necessary interface 2297 and limited broadcast via necessary interface
2175 without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 2298 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -2195,7 +2318,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2195 2318
2196 2319
2197 if (oldflp->oif) { 2320 if (oldflp->oif) {
2198 dev_out = dev_get_by_index(&init_net, oldflp->oif); 2321 dev_out = dev_get_by_index(net, oldflp->oif);
2199 err = -ENODEV; 2322 err = -ENODEV;
2200 if (dev_out == NULL) 2323 if (dev_out == NULL)
2201 goto out; 2324 goto out;
@@ -2206,14 +2329,15 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2206 goto out; /* Wrong error code */ 2329 goto out; /* Wrong error code */
2207 } 2330 }
2208 2331
2209 if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) { 2332 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2333 oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
2210 if (!fl.fl4_src) 2334 if (!fl.fl4_src)
2211 fl.fl4_src = inet_select_addr(dev_out, 0, 2335 fl.fl4_src = inet_select_addr(dev_out, 0,
2212 RT_SCOPE_LINK); 2336 RT_SCOPE_LINK);
2213 goto make_route; 2337 goto make_route;
2214 } 2338 }
2215 if (!fl.fl4_src) { 2339 if (!fl.fl4_src) {
2216 if (MULTICAST(oldflp->fl4_dst)) 2340 if (ipv4_is_multicast(oldflp->fl4_dst))
2217 fl.fl4_src = inet_select_addr(dev_out, 0, 2341 fl.fl4_src = inet_select_addr(dev_out, 0,
2218 fl.fl4_scope); 2342 fl.fl4_scope);
2219 else if (!oldflp->fl4_dst) 2343 else if (!oldflp->fl4_dst)
@@ -2228,15 +2352,15 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2228 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2352 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2229 if (dev_out) 2353 if (dev_out)
2230 dev_put(dev_out); 2354 dev_put(dev_out);
2231 dev_out = init_net.loopback_dev; 2355 dev_out = net->loopback_dev;
2232 dev_hold(dev_out); 2356 dev_hold(dev_out);
2233 fl.oif = init_net.loopback_dev->ifindex; 2357 fl.oif = net->loopback_dev->ifindex;
2234 res.type = RTN_LOCAL; 2358 res.type = RTN_LOCAL;
2235 flags |= RTCF_LOCAL; 2359 flags |= RTCF_LOCAL;
2236 goto make_route; 2360 goto make_route;
2237 } 2361 }
2238 2362
2239 if (fib_lookup(&fl, &res)) { 2363 if (fib_lookup(net, &fl, &res)) {
2240 res.fi = NULL; 2364 res.fi = NULL;
2241 if (oldflp->oif) { 2365 if (oldflp->oif) {
2242 /* Apparently, routing tables are wrong. Assume, 2366 /* Apparently, routing tables are wrong. Assume,
@@ -2275,7 +2399,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2275 fl.fl4_src = fl.fl4_dst; 2399 fl.fl4_src = fl.fl4_dst;
2276 if (dev_out) 2400 if (dev_out)
2277 dev_put(dev_out); 2401 dev_put(dev_out);
2278 dev_out = init_net.loopback_dev; 2402 dev_out = net->loopback_dev;
2279 dev_hold(dev_out); 2403 dev_hold(dev_out);
2280 fl.oif = dev_out->ifindex; 2404 fl.oif = dev_out->ifindex;
2281 if (res.fi) 2405 if (res.fi)
@@ -2291,7 +2415,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2291 else 2415 else
2292#endif 2416#endif
2293 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2417 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
2294 fib_select_default(&fl, &res); 2418 fib_select_default(net, &fl, &res);
2295 2419
2296 if (!fl.fl4_src) 2420 if (!fl.fl4_src)
2297 fl.fl4_src = FIB_RES_PREFSRC(res); 2421 fl.fl4_src = FIB_RES_PREFSRC(res);
@@ -2314,7 +2438,8 @@ make_route:
2314out: return err; 2438out: return err;
2315} 2439}
2316 2440
2317int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) 2441int __ip_route_output_key(struct net *net, struct rtable **rp,
2442 const struct flowi *flp)
2318{ 2443{
2319 unsigned hash; 2444 unsigned hash;
2320 struct rtable *rth; 2445 struct rtable *rth;
@@ -2330,10 +2455,10 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2330 rth->fl.oif == flp->oif && 2455 rth->fl.oif == flp->oif &&
2331 rth->fl.mark == flp->mark && 2456 rth->fl.mark == flp->mark &&
2332 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2457 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2333 (IPTOS_RT_MASK | RTO_ONLINK))) { 2458 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2334 rth->u.dst.lastuse = jiffies; 2459 rth->u.dst.dev->nd_net == net &&
2335 dst_hold(&rth->u.dst); 2460 rth->rt_genid == atomic_read(&rt_genid)) {
2336 rth->u.dst.__use++; 2461 dst_use(&rth->u.dst, jiffies);
2337 RT_CACHE_STAT_INC(out_hit); 2462 RT_CACHE_STAT_INC(out_hit);
2338 rcu_read_unlock_bh(); 2463 rcu_read_unlock_bh();
2339 *rp = rth; 2464 *rp = rth;
@@ -2343,7 +2468,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2343 } 2468 }
2344 rcu_read_unlock_bh(); 2469 rcu_read_unlock_bh();
2345 2470
2346 return ip_route_output_slow(rp, flp); 2471 return ip_route_output_slow(net, rp, flp);
2347} 2472}
2348 2473
2349EXPORT_SYMBOL_GPL(__ip_route_output_key); 2474EXPORT_SYMBOL_GPL(__ip_route_output_key);
@@ -2359,15 +2484,10 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2359 .check = ipv4_dst_check, 2484 .check = ipv4_dst_check,
2360 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2485 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2361 .entry_size = sizeof(struct rtable), 2486 .entry_size = sizeof(struct rtable),
2487 .entries = ATOMIC_INIT(0),
2362}; 2488};
2363 2489
2364 2490
2365static int ipv4_blackhole_output(struct sk_buff *skb)
2366{
2367 kfree_skb(skb);
2368 return 0;
2369}
2370
2371static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk) 2491static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
2372{ 2492{
2373 struct rtable *ort = *rp; 2493 struct rtable *ort = *rp;
@@ -2379,8 +2499,8 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
2379 2499
2380 atomic_set(&new->__refcnt, 1); 2500 atomic_set(&new->__refcnt, 1);
2381 new->__use = 1; 2501 new->__use = 1;
2382 new->input = ipv4_blackhole_output; 2502 new->input = dst_discard;
2383 new->output = ipv4_blackhole_output; 2503 new->output = dst_discard;
2384 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 2504 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
2385 2505
2386 new->dev = ort->u.dst.dev; 2506 new->dev = ort->u.dst.dev;
@@ -2392,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
2392 rt->idev = ort->idev; 2512 rt->idev = ort->idev;
2393 if (rt->idev) 2513 if (rt->idev)
2394 in_dev_hold(rt->idev); 2514 in_dev_hold(rt->idev);
2515 rt->rt_genid = atomic_read(&rt_genid);
2395 rt->rt_flags = ort->rt_flags; 2516 rt->rt_flags = ort->rt_flags;
2396 rt->rt_type = ort->rt_type; 2517 rt->rt_type = ort->rt_type;
2397 rt->rt_dst = ort->rt_dst; 2518 rt->rt_dst = ort->rt_dst;
@@ -2411,11 +2532,12 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
2411 return (rt ? 0 : -ENOMEM); 2532 return (rt ? 0 : -ENOMEM);
2412} 2533}
2413 2534
2414int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) 2535int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2536 struct sock *sk, int flags)
2415{ 2537{
2416 int err; 2538 int err;
2417 2539
2418 if ((err = __ip_route_output_key(rp, flp)) != 0) 2540 if ((err = __ip_route_output_key(net, rp, flp)) != 0)
2419 return err; 2541 return err;
2420 2542
2421 if (flp->proto) { 2543 if (flp->proto) {
@@ -2423,7 +2545,8 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
2423 flp->fl4_src = (*rp)->rt_src; 2545 flp->fl4_src = (*rp)->rt_src;
2424 if (!flp->fl4_dst) 2546 if (!flp->fl4_dst)
2425 flp->fl4_dst = (*rp)->rt_dst; 2547 flp->fl4_dst = (*rp)->rt_dst;
2426 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); 2548 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2549 flags ? XFRM_LOOKUP_WAIT : 0);
2427 if (err == -EREMOTE) 2550 if (err == -EREMOTE)
2428 err = ipv4_dst_blackhole(rp, flp, sk); 2551 err = ipv4_dst_blackhole(rp, flp, sk);
2429 2552
@@ -2435,9 +2558,9 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
2435 2558
2436EXPORT_SYMBOL_GPL(ip_route_output_flow); 2559EXPORT_SYMBOL_GPL(ip_route_output_flow);
2437 2560
2438int ip_route_output_key(struct rtable **rp, struct flowi *flp) 2561int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2439{ 2562{
2440 return ip_route_output_flow(rp, flp, NULL, 0); 2563 return ip_route_output_flow(net, rp, flp, NULL, 0);
2441} 2564}
2442 2565
2443static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2566static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -2504,8 +2627,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2504#ifdef CONFIG_IP_MROUTE 2627#ifdef CONFIG_IP_MROUTE
2505 __be32 dst = rt->rt_dst; 2628 __be32 dst = rt->rt_dst;
2506 2629
2507 if (MULTICAST(dst) && !LOCAL_MCAST(dst) && 2630 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2508 IPV4_DEVCONF_ALL(MC_FORWARDING)) { 2631 IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
2509 int err = ipmr_get_route(skb, r, nowait); 2632 int err = ipmr_get_route(skb, r, nowait);
2510 if (err <= 0) { 2633 if (err <= 0) {
2511 if (!nowait) { 2634 if (!nowait) {
@@ -2536,6 +2659,7 @@ nla_put_failure:
2536 2659
2537static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2660static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2538{ 2661{
2662 struct net *net = in_skb->sk->sk_net;
2539 struct rtmsg *rtm; 2663 struct rtmsg *rtm;
2540 struct nlattr *tb[RTA_MAX+1]; 2664 struct nlattr *tb[RTA_MAX+1];
2541 struct rtable *rt = NULL; 2665 struct rtable *rt = NULL;
@@ -2545,6 +2669,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2545 int err; 2669 int err;
2546 struct sk_buff *skb; 2670 struct sk_buff *skb;
2547 2671
2672 if (net != &init_net)
2673 return -EINVAL;
2674
2548 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2675 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2549 if (err < 0) 2676 if (err < 0)
2550 goto errout; 2677 goto errout;
@@ -2600,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2600 }, 2727 },
2601 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2728 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2602 }; 2729 };
2603 err = ip_route_output_key(&rt, &fl); 2730 err = ip_route_output_key(&init_net, &rt, &fl);
2604 } 2731 }
2605 2732
2606 if (err) 2733 if (err)
@@ -2615,7 +2742,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2615 if (err <= 0) 2742 if (err <= 0)
2616 goto errout_free; 2743 goto errout_free;
2617 2744
2618 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 2745 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2619errout: 2746errout:
2620 return err; 2747 return err;
2621 2748
@@ -2631,16 +2758,17 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2631 int idx, s_idx; 2758 int idx, s_idx;
2632 2759
2633 s_h = cb->args[0]; 2760 s_h = cb->args[0];
2761 if (s_h < 0)
2762 s_h = 0;
2634 s_idx = idx = cb->args[1]; 2763 s_idx = idx = cb->args[1];
2635 for (h = 0; h <= rt_hash_mask; h++) { 2764 for (h = s_h; h <= rt_hash_mask; h++) {
2636 if (h < s_h) continue;
2637 if (h > s_h)
2638 s_idx = 0;
2639 rcu_read_lock_bh(); 2765 rcu_read_lock_bh();
2640 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 2766 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2641 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2767 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2642 if (idx < s_idx) 2768 if (idx < s_idx)
2643 continue; 2769 continue;
2770 if (rt->rt_genid != atomic_read(&rt_genid))
2771 continue;
2644 skb->dst = dst_clone(&rt->u.dst); 2772 skb->dst = dst_clone(&rt->u.dst);
2645 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2773 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2646 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2774 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
@@ -2652,6 +2780,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2652 dst_release(xchg(&skb->dst, NULL)); 2780 dst_release(xchg(&skb->dst, NULL));
2653 } 2781 }
2654 rcu_read_unlock_bh(); 2782 rcu_read_unlock_bh();
2783 s_idx = 0;
2655 } 2784 }
2656 2785
2657done: 2786done:
@@ -2709,24 +2838,6 @@ ctl_table ipv4_route_table[] = {
2709 .strategy = &ipv4_sysctl_rtcache_flush_strategy, 2838 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
2710 }, 2839 },
2711 { 2840 {
2712 .ctl_name = NET_IPV4_ROUTE_MIN_DELAY,
2713 .procname = "min_delay",
2714 .data = &ip_rt_min_delay,
2715 .maxlen = sizeof(int),
2716 .mode = 0644,
2717 .proc_handler = &proc_dointvec_jiffies,
2718 .strategy = &sysctl_jiffies,
2719 },
2720 {
2721 .ctl_name = NET_IPV4_ROUTE_MAX_DELAY,
2722 .procname = "max_delay",
2723 .data = &ip_rt_max_delay,
2724 .maxlen = sizeof(int),
2725 .mode = 0644,
2726 .proc_handler = &proc_dointvec_jiffies,
2727 .strategy = &sysctl_jiffies,
2728 },
2729 {
2730 .ctl_name = NET_IPV4_ROUTE_GC_THRESH, 2841 .ctl_name = NET_IPV4_ROUTE_GC_THRESH,
2731 .procname = "gc_thresh", 2842 .procname = "gc_thresh",
2732 .data = &ipv4_dst_ops.gc_thresh, 2843 .data = &ipv4_dst_ops.gc_thresh,
@@ -2867,55 +2978,7 @@ ctl_table ipv4_route_table[] = {
2867#endif 2978#endif
2868 2979
2869#ifdef CONFIG_NET_CLS_ROUTE 2980#ifdef CONFIG_NET_CLS_ROUTE
2870struct ip_rt_acct *ip_rt_acct; 2981struct ip_rt_acct *ip_rt_acct __read_mostly;
2871
2872/* This code sucks. But you should have seen it before! --RR */
2873
2874/* IP route accounting ptr for this logical cpu number. */
2875#define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256)
2876
2877#ifdef CONFIG_PROC_FS
2878static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
2879 int length, int *eof, void *data)
2880{
2881 unsigned int i;
2882
2883 if ((offset & 3) || (length & 3))
2884 return -EIO;
2885
2886 if (offset >= sizeof(struct ip_rt_acct) * 256) {
2887 *eof = 1;
2888 return 0;
2889 }
2890
2891 if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
2892 length = sizeof(struct ip_rt_acct) * 256 - offset;
2893 *eof = 1;
2894 }
2895
2896 offset /= sizeof(u32);
2897
2898 if (length > 0) {
2899 u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset;
2900 u32 *dst = (u32 *) buffer;
2901
2902 /* Copy first cpu. */
2903 *start = buffer;
2904 memcpy(dst, src, length);
2905
2906 /* Add the other cpus in, one int at a time */
2907 for_each_possible_cpu(i) {
2908 unsigned int j;
2909
2910 src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
2911
2912 for (j = 0; j < length/4; j++)
2913 dst[j] += src[j];
2914 }
2915 }
2916 return length;
2917}
2918#endif /* CONFIG_PROC_FS */
2919#endif /* CONFIG_NET_CLS_ROUTE */ 2982#endif /* CONFIG_NET_CLS_ROUTE */
2920 2983
2921static __initdata unsigned long rhash_entries; 2984static __initdata unsigned long rhash_entries;
@@ -2932,20 +2995,13 @@ int __init ip_rt_init(void)
2932{ 2995{
2933 int rc = 0; 2996 int rc = 0;
2934 2997
2935 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ 2998 atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
2936 (jiffies ^ (jiffies >> 7))); 2999 (jiffies ^ (jiffies >> 7))));
2937 3000
2938#ifdef CONFIG_NET_CLS_ROUTE 3001#ifdef CONFIG_NET_CLS_ROUTE
2939 { 3002 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
2940 int order;
2941 for (order = 0;
2942 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
2943 /* NOTHING */;
2944 ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
2945 if (!ip_rt_acct) 3003 if (!ip_rt_acct)
2946 panic("IP: failed to allocate ip_rt_acct\n"); 3004 panic("IP: failed to allocate ip_rt_acct\n");
2947 memset(ip_rt_acct, 0, PAGE_SIZE << order);
2948 }
2949#endif 3005#endif
2950 3006
2951 ipv4_dst_ops.kmem_cachep = 3007 ipv4_dst_ops.kmem_cachep =
@@ -2973,10 +3029,7 @@ int __init ip_rt_init(void)
2973 devinet_init(); 3029 devinet_init();
2974 ip_fib_init(); 3030 ip_fib_init();
2975 3031
2976 init_timer(&rt_flush_timer); 3032 setup_timer(&rt_secret_timer, rt_secret_rebuild, 0);
2977 rt_flush_timer.function = rt_run_flush;
2978 init_timer(&rt_secret_timer);
2979 rt_secret_timer.function = rt_secret_rebuild;
2980 3033
2981 /* All the timers, started at system startup tend 3034 /* All the timers, started at system startup tend
2982 to synchronize. Perturb it a bit. 3035 to synchronize. Perturb it a bit.
@@ -2988,20 +3041,8 @@ int __init ip_rt_init(void)
2988 ip_rt_secret_interval; 3041 ip_rt_secret_interval;
2989 add_timer(&rt_secret_timer); 3042 add_timer(&rt_secret_timer);
2990 3043
2991#ifdef CONFIG_PROC_FS 3044 if (ip_rt_proc_init(&init_net))
2992 { 3045 printk(KERN_ERR "Unable to create route proc files\n");
2993 struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
2994 if (!proc_net_fops_create(&init_net, "rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
2995 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
2996 init_net.proc_net_stat))) {
2997 return -ENOMEM;
2998 }
2999 rtstat_pde->proc_fops = &rt_cpu_seq_fops;
3000 }
3001#ifdef CONFIG_NET_CLS_ROUTE
3002 create_proc_read_entry("rt_acct", 0, init_net.proc_net, ip_rt_acct_read, NULL);
3003#endif
3004#endif
3005#ifdef CONFIG_XFRM 3046#ifdef CONFIG_XFRM
3006 xfrm_init(); 3047 xfrm_init();
3007 xfrm4_init(); 3048 xfrm4_init();
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2da1be0589a9..f470fe4511db 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -264,7 +264,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
264 { .sport = th->dest, 264 { .sport = th->dest,
265 .dport = th->source } } }; 265 .dport = th->source } } };
266 security_req_classify_flow(req, &fl); 266 security_req_classify_flow(req, &fl);
267 if (ip_route_output_key(&rt, &fl)) { 267 if (ip_route_output_key(&init_net, &rt, &fl)) {
268 reqsk_free(req); 268 reqsk_free(req);
269 goto out; 269 goto out;
270 } 270 }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ffddd2b45352..88286f35d1e2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,83 +13,20 @@
13#include <linux/igmp.h> 13#include <linux/igmp.h>
14#include <linux/inetdevice.h> 14#include <linux/inetdevice.h>
15#include <linux/seqlock.h> 15#include <linux/seqlock.h>
16#include <linux/init.h>
16#include <net/snmp.h> 17#include <net/snmp.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/ip.h> 19#include <net/ip.h>
19#include <net/route.h> 20#include <net/route.h>
20#include <net/tcp.h> 21#include <net/tcp.h>
22#include <net/udp.h>
21#include <net/cipso_ipv4.h> 23#include <net/cipso_ipv4.h>
22#include <net/inet_frag.h> 24#include <net/inet_frag.h>
23 25
24/* From af_inet.c */
25extern int sysctl_ip_nonlocal_bind;
26
27#ifdef CONFIG_SYSCTL
28static int zero; 26static int zero;
29static int tcp_retr1_max = 255; 27static int tcp_retr1_max = 255;
30static int ip_local_port_range_min[] = { 1, 1 }; 28static int ip_local_port_range_min[] = { 1, 1 };
31static int ip_local_port_range_max[] = { 65535, 65535 }; 29static int ip_local_port_range_max[] = { 65535, 65535 };
32#endif
33
34struct ipv4_config ipv4_config;
35
36#ifdef CONFIG_SYSCTL
37
38static
39int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
40 void __user *buffer, size_t *lenp, loff_t *ppos)
41{
42 int val = IPV4_DEVCONF_ALL(FORWARDING);
43 int ret;
44
45 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
46
47 if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
48 inet_forward_change();
49
50 return ret;
51}
52
53static int ipv4_sysctl_forward_strategy(ctl_table *table,
54 int __user *name, int nlen,
55 void __user *oldval, size_t __user *oldlenp,
56 void __user *newval, size_t newlen)
57{
58 int *valp = table->data;
59 int new;
60
61 if (!newval || !newlen)
62 return 0;
63
64 if (newlen != sizeof(int))
65 return -EINVAL;
66
67 if (get_user(new, (int __user *)newval))
68 return -EFAULT;
69
70 if (new == *valp)
71 return 0;
72
73 if (oldval && oldlenp) {
74 size_t len;
75
76 if (get_user(len, oldlenp))
77 return -EFAULT;
78
79 if (len) {
80 if (len > table->maxlen)
81 len = table->maxlen;
82 if (copy_to_user(oldval, valp, len))
83 return -EFAULT;
84 if (put_user(len, oldlenp))
85 return -EFAULT;
86 }
87 }
88
89 *valp = new;
90 inet_forward_change();
91 return 1;
92}
93 30
94extern seqlock_t sysctl_port_range_lock; 31extern seqlock_t sysctl_port_range_lock;
95extern int sysctl_local_port_range[2]; 32extern int sysctl_local_port_range[2];
@@ -191,7 +128,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
191 128
192 tcp_get_default_congestion_control(val); 129 tcp_get_default_congestion_control(val);
193 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); 130 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
194 if (ret == 0 && newval && newlen) 131 if (ret == 1 && newval && newlen)
195 ret = tcp_set_default_congestion_control(val); 132 ret = tcp_set_default_congestion_control(val);
196 return ret; 133 return ret;
197} 134}
@@ -248,7 +185,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
248 185
249 tcp_get_available_congestion_control(tbl.data, tbl.maxlen); 186 tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
250 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); 187 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
251 if (ret == 0 && newval && newlen) 188 if (ret == 1 && newval && newlen)
252 ret = tcp_set_allowed_congestion_control(tbl.data); 189 ret = tcp_set_allowed_congestion_control(tbl.data);
253 kfree(tbl.data); 190 kfree(tbl.data);
254 191
@@ -256,7 +193,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
256 193
257} 194}
258 195
259ctl_table ipv4_table[] = { 196static struct ctl_table ipv4_table[] = {
260 { 197 {
261 .ctl_name = NET_IPV4_TCP_TIMESTAMPS, 198 .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
262 .procname = "tcp_timestamps", 199 .procname = "tcp_timestamps",
@@ -290,15 +227,6 @@ ctl_table ipv4_table[] = {
290 .proc_handler = &proc_dointvec 227 .proc_handler = &proc_dointvec
291 }, 228 },
292 { 229 {
293 .ctl_name = NET_IPV4_FORWARD,
294 .procname = "ip_forward",
295 .data = &IPV4_DEVCONF_ALL(FORWARDING),
296 .maxlen = sizeof(int),
297 .mode = 0644,
298 .proc_handler = &ipv4_sysctl_forward,
299 .strategy = &ipv4_sysctl_forward_strategy
300 },
301 {
302 .ctl_name = NET_IPV4_DEFAULT_TTL, 230 .ctl_name = NET_IPV4_DEFAULT_TTL,
303 .procname = "ip_default_ttl", 231 .procname = "ip_default_ttl",
304 .data = &sysctl_ip_default_ttl, 232 .data = &sysctl_ip_default_ttl,
@@ -356,22 +284,6 @@ ctl_table ipv4_table[] = {
356 .proc_handler = &proc_dointvec 284 .proc_handler = &proc_dointvec
357 }, 285 },
358 { 286 {
359 .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
360 .procname = "ipfrag_high_thresh",
361 .data = &ip4_frags_ctl.high_thresh,
362 .maxlen = sizeof(int),
363 .mode = 0644,
364 .proc_handler = &proc_dointvec
365 },
366 {
367 .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
368 .procname = "ipfrag_low_thresh",
369 .data = &ip4_frags_ctl.low_thresh,
370 .maxlen = sizeof(int),
371 .mode = 0644,
372 .proc_handler = &proc_dointvec
373 },
374 {
375 .ctl_name = NET_IPV4_DYNADDR, 287 .ctl_name = NET_IPV4_DYNADDR,
376 .procname = "ip_dynaddr", 288 .procname = "ip_dynaddr",
377 .data = &sysctl_ip_dynaddr, 289 .data = &sysctl_ip_dynaddr,
@@ -380,15 +292,6 @@ ctl_table ipv4_table[] = {
380 .proc_handler = &proc_dointvec 292 .proc_handler = &proc_dointvec
381 }, 293 },
382 { 294 {
383 .ctl_name = NET_IPV4_IPFRAG_TIME,
384 .procname = "ipfrag_time",
385 .data = &ip4_frags_ctl.timeout,
386 .maxlen = sizeof(int),
387 .mode = 0644,
388 .proc_handler = &proc_dointvec_jiffies,
389 .strategy = &sysctl_jiffies
390 },
391 {
392 .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, 295 .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME,
393 .procname = "tcp_keepalive_time", 296 .procname = "tcp_keepalive_time",
394 .data = &sysctl_tcp_keepalive_time, 297 .data = &sysctl_tcp_keepalive_time,
@@ -731,23 +634,6 @@ ctl_table ipv4_table[] = {
731 .proc_handler = &proc_dointvec 634 .proc_handler = &proc_dointvec
732 }, 635 },
733 { 636 {
734 .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
735 .procname = "ipfrag_secret_interval",
736 .data = &ip4_frags_ctl.secret_interval,
737 .maxlen = sizeof(int),
738 .mode = 0644,
739 .proc_handler = &proc_dointvec_jiffies,
740 .strategy = &sysctl_jiffies
741 },
742 {
743 .procname = "ipfrag_max_dist",
744 .data = &sysctl_ipfrag_max_dist,
745 .maxlen = sizeof(int),
746 .mode = 0644,
747 .proc_handler = &proc_dointvec_minmax,
748 .extra1 = &zero
749 },
750 {
751 .ctl_name = NET_TCP_NO_METRICS_SAVE, 637 .ctl_name = NET_TCP_NO_METRICS_SAVE,
752 .procname = "tcp_no_metrics_save", 638 .procname = "tcp_no_metrics_save",
753 .data = &sysctl_tcp_nometrics_save, 639 .data = &sysctl_tcp_nometrics_save,
@@ -885,9 +771,52 @@ ctl_table ipv4_table[] = {
885 .mode = 0644, 771 .mode = 0644,
886 .proc_handler = &proc_dointvec, 772 .proc_handler = &proc_dointvec,
887 }, 773 },
774 {
775 .ctl_name = CTL_UNNUMBERED,
776 .procname = "udp_mem",
777 .data = &sysctl_udp_mem,
778 .maxlen = sizeof(sysctl_udp_mem),
779 .mode = 0644,
780 .proc_handler = &proc_dointvec_minmax,
781 .strategy = &sysctl_intvec,
782 .extra1 = &zero
783 },
784 {
785 .ctl_name = CTL_UNNUMBERED,
786 .procname = "udp_rmem_min",
787 .data = &sysctl_udp_rmem_min,
788 .maxlen = sizeof(sysctl_udp_rmem_min),
789 .mode = 0644,
790 .proc_handler = &proc_dointvec_minmax,
791 .strategy = &sysctl_intvec,
792 .extra1 = &zero
793 },
794 {
795 .ctl_name = CTL_UNNUMBERED,
796 .procname = "udp_wmem_min",
797 .data = &sysctl_udp_wmem_min,
798 .maxlen = sizeof(sysctl_udp_wmem_min),
799 .mode = 0644,
800 .proc_handler = &proc_dointvec_minmax,
801 .strategy = &sysctl_intvec,
802 .extra1 = &zero
803 },
888 { .ctl_name = 0 } 804 { .ctl_name = 0 }
889}; 805};
890 806
891#endif /* CONFIG_SYSCTL */ 807struct ctl_path net_ipv4_ctl_path[] = {
808 { .procname = "net", .ctl_name = CTL_NET, },
809 { .procname = "ipv4", .ctl_name = NET_IPV4, },
810 { },
811};
812EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
813
814static __init int sysctl_ipv4_init(void)
815{
816 struct ctl_table_header *hdr;
817
818 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
819 return hdr == NULL ? -ENOMEM : 0;
820}
892 821
893EXPORT_SYMBOL(ipv4_config); 822__initcall(sysctl_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2e6ad6dbba6c..a0d373bd9065 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -254,6 +254,10 @@
254#include <linux/poll.h> 254#include <linux/poll.h>
255#include <linux/init.h> 255#include <linux/init.h>
256#include <linux/fs.h> 256#include <linux/fs.h>
257#include <linux/skbuff.h>
258#include <linux/splice.h>
259#include <linux/net.h>
260#include <linux/socket.h>
257#include <linux/random.h> 261#include <linux/random.h>
258#include <linux/bootmem.h> 262#include <linux/bootmem.h>
259#include <linux/cache.h> 263#include <linux/cache.h>
@@ -265,6 +269,7 @@
265#include <net/xfrm.h> 269#include <net/xfrm.h>
266#include <net/ip.h> 270#include <net/ip.h>
267#include <net/netdma.h> 271#include <net/netdma.h>
272#include <net/sock.h>
268 273
269#include <asm/uaccess.h> 274#include <asm/uaccess.h>
270#include <asm/ioctls.h> 275#include <asm/ioctls.h>
@@ -292,9 +297,18 @@ EXPORT_SYMBOL(tcp_memory_allocated);
292EXPORT_SYMBOL(tcp_sockets_allocated); 297EXPORT_SYMBOL(tcp_sockets_allocated);
293 298
294/* 299/*
300 * TCP splice context
301 */
302struct tcp_splice_state {
303 struct pipe_inode_info *pipe;
304 size_t len;
305 unsigned int flags;
306};
307
308/*
295 * Pressure flag: try to collapse. 309 * Pressure flag: try to collapse.
296 * Technical note: it is used by multiple contexts non atomically. 310 * Technical note: it is used by multiple contexts non atomically.
297 * All the sk_stream_mem_schedule() is of this nature: accounting 311 * All the __sk_mem_schedule() is of this nature: accounting
298 * is strict, actions are advisory and have some latency. 312 * is strict, actions are advisory and have some latency.
299 */ 313 */
300int tcp_memory_pressure __read_mostly; 314int tcp_memory_pressure __read_mostly;
@@ -471,7 +485,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
471 tcb->sacked = 0; 485 tcb->sacked = 0;
472 skb_header_release(skb); 486 skb_header_release(skb);
473 tcp_add_write_queue_tail(sk, skb); 487 tcp_add_write_queue_tail(sk, skb);
474 sk_charge_skb(sk, skb); 488 sk->sk_wmem_queued += skb->truesize;
489 sk_mem_charge(sk, skb->truesize);
475 if (tp->nonagle & TCP_NAGLE_PUSH) 490 if (tp->nonagle & TCP_NAGLE_PUSH)
476 tp->nonagle &= ~TCP_NAGLE_PUSH; 491 tp->nonagle &= ~TCP_NAGLE_PUSH;
477} 492}
@@ -482,7 +497,6 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
482 if (flags & MSG_OOB) { 497 if (flags & MSG_OOB) {
483 tp->urg_mode = 1; 498 tp->urg_mode = 1;
484 tp->snd_up = tp->write_seq; 499 tp->snd_up = tp->write_seq;
485 TCP_SKB_CB(skb)->sacked |= TCPCB_URG;
486 } 500 }
487} 501}
488 502
@@ -501,6 +515,145 @@ static inline void tcp_push(struct sock *sk, int flags, int mss_now,
501 } 515 }
502} 516}
503 517
518static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
519 unsigned int offset, size_t len)
520{
521 struct tcp_splice_state *tss = rd_desc->arg.data;
522
523 return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags);
524}
525
526static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
527{
528 /* Store TCP splice context information in read_descriptor_t. */
529 read_descriptor_t rd_desc = {
530 .arg.data = tss,
531 };
532
533 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
534}
535
536/**
537 * tcp_splice_read - splice data from TCP socket to a pipe
538 * @sock: socket to splice from
539 * @ppos: position (not valid)
540 * @pipe: pipe to splice to
541 * @len: number of bytes to splice
542 * @flags: splice modifier flags
543 *
544 * Description:
545 * Will read pages from given socket and fill them into a pipe.
546 *
547 **/
548ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
549 struct pipe_inode_info *pipe, size_t len,
550 unsigned int flags)
551{
552 struct sock *sk = sock->sk;
553 struct tcp_splice_state tss = {
554 .pipe = pipe,
555 .len = len,
556 .flags = flags,
557 };
558 long timeo;
559 ssize_t spliced;
560 int ret;
561
562 /*
563 * We can't seek on a socket input
564 */
565 if (unlikely(*ppos))
566 return -ESPIPE;
567
568 ret = spliced = 0;
569
570 lock_sock(sk);
571
572 timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
573 while (tss.len) {
574 ret = __tcp_splice_read(sk, &tss);
575 if (ret < 0)
576 break;
577 else if (!ret) {
578 if (spliced)
579 break;
580 if (flags & SPLICE_F_NONBLOCK) {
581 ret = -EAGAIN;
582 break;
583 }
584 if (sock_flag(sk, SOCK_DONE))
585 break;
586 if (sk->sk_err) {
587 ret = sock_error(sk);
588 break;
589 }
590 if (sk->sk_shutdown & RCV_SHUTDOWN)
591 break;
592 if (sk->sk_state == TCP_CLOSE) {
593 /*
594 * This occurs when user tries to read
595 * from never connected socket.
596 */
597 if (!sock_flag(sk, SOCK_DONE))
598 ret = -ENOTCONN;
599 break;
600 }
601 if (!timeo) {
602 ret = -EAGAIN;
603 break;
604 }
605 sk_wait_data(sk, &timeo);
606 if (signal_pending(current)) {
607 ret = sock_intr_errno(timeo);
608 break;
609 }
610 continue;
611 }
612 tss.len -= ret;
613 spliced += ret;
614
615 release_sock(sk);
616 lock_sock(sk);
617
618 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
619 (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo ||
620 signal_pending(current))
621 break;
622 }
623
624 release_sock(sk);
625
626 if (spliced)
627 return spliced;
628
629 return ret;
630}
631
632struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
633{
634 struct sk_buff *skb;
635
636 /* The TCP header must be at least 32-bit aligned. */
637 size = ALIGN(size, 4);
638
639 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
640 if (skb) {
641 if (sk_wmem_schedule(sk, skb->truesize)) {
642 /*
643 * Make sure that we have exactly size bytes
644 * available to the caller, no more, no less.
645 */
646 skb_reserve(skb, skb_tailroom(skb) - size);
647 return skb;
648 }
649 __kfree_skb(skb);
650 } else {
651 sk->sk_prot->enter_memory_pressure();
652 sk_stream_moderate_sndbuf(sk);
653 }
654 return NULL;
655}
656
504static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, 657static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
505 size_t psize, int flags) 658 size_t psize, int flags)
506{ 659{
@@ -537,8 +690,7 @@ new_segment:
537 if (!sk_stream_memory_free(sk)) 690 if (!sk_stream_memory_free(sk))
538 goto wait_for_sndbuf; 691 goto wait_for_sndbuf;
539 692
540 skb = sk_stream_alloc_pskb(sk, 0, 0, 693 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
541 sk->sk_allocation);
542 if (!skb) 694 if (!skb)
543 goto wait_for_memory; 695 goto wait_for_memory;
544 696
@@ -555,7 +707,7 @@ new_segment:
555 tcp_mark_push(tp, skb); 707 tcp_mark_push(tp, skb);
556 goto new_segment; 708 goto new_segment;
557 } 709 }
558 if (!sk_stream_wmem_schedule(sk, copy)) 710 if (!sk_wmem_schedule(sk, copy))
559 goto wait_for_memory; 711 goto wait_for_memory;
560 712
561 if (can_coalesce) { 713 if (can_coalesce) {
@@ -569,7 +721,7 @@ new_segment:
569 skb->data_len += copy; 721 skb->data_len += copy;
570 skb->truesize += copy; 722 skb->truesize += copy;
571 sk->sk_wmem_queued += copy; 723 sk->sk_wmem_queued += copy;
572 sk->sk_forward_alloc -= copy; 724 sk_mem_charge(sk, copy);
573 skb->ip_summed = CHECKSUM_PARTIAL; 725 skb->ip_summed = CHECKSUM_PARTIAL;
574 tp->write_seq += copy; 726 tp->write_seq += copy;
575 TCP_SKB_CB(skb)->end_seq += copy; 727 TCP_SKB_CB(skb)->end_seq += copy;
@@ -718,8 +870,8 @@ new_segment:
718 if (!sk_stream_memory_free(sk)) 870 if (!sk_stream_memory_free(sk))
719 goto wait_for_sndbuf; 871 goto wait_for_sndbuf;
720 872
721 skb = sk_stream_alloc_pskb(sk, select_size(sk), 873 skb = sk_stream_alloc_skb(sk, select_size(sk),
722 0, sk->sk_allocation); 874 sk->sk_allocation);
723 if (!skb) 875 if (!skb)
724 goto wait_for_memory; 876 goto wait_for_memory;
725 877
@@ -776,7 +928,7 @@ new_segment:
776 if (copy > PAGE_SIZE - off) 928 if (copy > PAGE_SIZE - off)
777 copy = PAGE_SIZE - off; 929 copy = PAGE_SIZE - off;
778 930
779 if (!sk_stream_wmem_schedule(sk, copy)) 931 if (!sk_wmem_schedule(sk, copy))
780 goto wait_for_memory; 932 goto wait_for_memory;
781 933
782 if (!page) { 934 if (!page) {
@@ -867,7 +1019,7 @@ do_fault:
867 * reset, where we can be unlinking the send_head. 1019 * reset, where we can be unlinking the send_head.
868 */ 1020 */
869 tcp_check_send_head(sk, skb); 1021 tcp_check_send_head(sk, skb);
870 sk_stream_free_skb(sk, skb); 1022 sk_wmem_free_skb(sk, skb);
871 } 1023 }
872 1024
873do_error: 1025do_error:
@@ -1500,6 +1652,41 @@ recv_urg:
1500 goto out; 1652 goto out;
1501} 1653}
1502 1654
1655void tcp_set_state(struct sock *sk, int state)
1656{
1657 int oldstate = sk->sk_state;
1658
1659 switch (state) {
1660 case TCP_ESTABLISHED:
1661 if (oldstate != TCP_ESTABLISHED)
1662 TCP_INC_STATS(TCP_MIB_CURRESTAB);
1663 break;
1664
1665 case TCP_CLOSE:
1666 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1667 TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1668
1669 sk->sk_prot->unhash(sk);
1670 if (inet_csk(sk)->icsk_bind_hash &&
1671 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1672 inet_put_port(&tcp_hashinfo, sk);
1673 /* fall through */
1674 default:
1675 if (oldstate==TCP_ESTABLISHED)
1676 TCP_DEC_STATS(TCP_MIB_CURRESTAB);
1677 }
1678
1679 /* Change state AFTER socket is unhashed to avoid closed
1680 * socket sitting in hash tables.
1681 */
1682 sk->sk_state = state;
1683
1684#ifdef STATE_TRACE
1685 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1686#endif
1687}
1688EXPORT_SYMBOL_GPL(tcp_set_state);
1689
1503/* 1690/*
1504 * State processing on a close. This implements the state shift for 1691 * State processing on a close. This implements the state shift for
1505 * sending our FIN frame. Note that we only send a FIN for some 1692 * sending our FIN frame. Note that we only send a FIN for some
@@ -1586,7 +1773,7 @@ void tcp_close(struct sock *sk, long timeout)
1586 __kfree_skb(skb); 1773 __kfree_skb(skb);
1587 } 1774 }
1588 1775
1589 sk_stream_mem_reclaim(sk); 1776 sk_mem_reclaim(sk);
1590 1777
1591 /* As outlined in RFC 2525, section 2.17, we send a RST here because 1778 /* As outlined in RFC 2525, section 2.17, we send a RST here because
1592 * data was lost. To witness the awful effects of the old behavior of 1779 * data was lost. To witness the awful effects of the old behavior of
@@ -1689,7 +1876,7 @@ adjudge_to_death:
1689 } 1876 }
1690 } 1877 }
1691 if (sk->sk_state != TCP_CLOSE) { 1878 if (sk->sk_state != TCP_CLOSE) {
1692 sk_stream_mem_reclaim(sk); 1879 sk_mem_reclaim(sk);
1693 if (tcp_too_many_orphans(sk, 1880 if (tcp_too_many_orphans(sk,
1694 atomic_read(sk->sk_prot->orphan_count))) { 1881 atomic_read(sk->sk_prot->orphan_count))) {
1695 if (net_ratelimit()) 1882 if (net_ratelimit())
@@ -2411,7 +2598,6 @@ void tcp_done(struct sock *sk)
2411} 2598}
2412EXPORT_SYMBOL_GPL(tcp_done); 2599EXPORT_SYMBOL_GPL(tcp_done);
2413 2600
2414extern void __skb_cb_too_small_for_tcp(int, int);
2415extern struct tcp_congestion_ops tcp_reno; 2601extern struct tcp_congestion_ops tcp_reno;
2416 2602
2417static __initdata unsigned long thash_entries; 2603static __initdata unsigned long thash_entries;
@@ -2430,9 +2616,7 @@ void __init tcp_init(void)
2430 unsigned long limit; 2616 unsigned long limit;
2431 int order, i, max_share; 2617 int order, i, max_share;
2432 2618
2433 if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) 2619 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
2434 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
2435 sizeof(skb->cb));
2436 2620
2437 tcp_hashinfo.bind_bucket_cachep = 2621 tcp_hashinfo.bind_bucket_cachep =
2438 kmem_cache_create("tcp_bind_bucket", 2622 kmem_cache_create("tcp_bind_bucket",
@@ -2453,14 +2637,14 @@ void __init tcp_init(void)
2453 0, 2637 0,
2454 &tcp_hashinfo.ehash_size, 2638 &tcp_hashinfo.ehash_size,
2455 NULL, 2639 NULL,
2456 0); 2640 thash_entries ? 0 : 512 * 1024);
2457 tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; 2641 tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size;
2458 for (i = 0; i < tcp_hashinfo.ehash_size; i++) { 2642 for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
2459 rwlock_init(&tcp_hashinfo.ehash[i].lock);
2460 INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); 2643 INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
2461 INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); 2644 INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain);
2462 } 2645 }
2463 2646 if (inet_ehash_locks_alloc(&tcp_hashinfo))
2647 panic("TCP: failed to alloc ehash_locks");
2464 tcp_hashinfo.bhash = 2648 tcp_hashinfo.bhash =
2465 alloc_large_system_hash("TCP bind", 2649 alloc_large_system_hash("TCP bind",
2466 sizeof(struct inet_bind_hashbucket), 2650 sizeof(struct inet_bind_hashbucket),
@@ -2509,11 +2693,11 @@ void __init tcp_init(void)
2509 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); 2693 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
2510 max_share = min(4UL*1024*1024, limit); 2694 max_share = min(4UL*1024*1024, limit);
2511 2695
2512 sysctl_tcp_wmem[0] = SK_STREAM_MEM_QUANTUM; 2696 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
2513 sysctl_tcp_wmem[1] = 16*1024; 2697 sysctl_tcp_wmem[1] = 16*1024;
2514 sysctl_tcp_wmem[2] = max(64*1024, max_share); 2698 sysctl_tcp_wmem[2] = max(64*1024, max_share);
2515 2699
2516 sysctl_tcp_rmem[0] = SK_STREAM_MEM_QUANTUM; 2700 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
2517 sysctl_tcp_rmem[1] = 87380; 2701 sysctl_tcp_rmem[1] = 87380;
2518 sysctl_tcp_rmem[2] = max(87380, max_share); 2702 sysctl_tcp_rmem[2] = max(87380, max_share);
2519 2703
@@ -2532,6 +2716,7 @@ EXPORT_SYMBOL(tcp_poll);
2532EXPORT_SYMBOL(tcp_read_sock); 2716EXPORT_SYMBOL(tcp_read_sock);
2533EXPORT_SYMBOL(tcp_recvmsg); 2717EXPORT_SYMBOL(tcp_recvmsg);
2534EXPORT_SYMBOL(tcp_sendmsg); 2718EXPORT_SYMBOL(tcp_sendmsg);
2719EXPORT_SYMBOL(tcp_splice_read);
2535EXPORT_SYMBOL(tcp_sendpage); 2720EXPORT_SYMBOL(tcp_sendpage);
2536EXPORT_SYMBOL(tcp_setsockopt); 2721EXPORT_SYMBOL(tcp_setsockopt);
2537EXPORT_SYMBOL(tcp_shutdown); 2722EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5dba0fc8f579..5212ed9b0c98 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -136,8 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
136 ca->cnt = 1; 136 ca->cnt = 1;
137} 137}
138 138
139static void bictcp_cong_avoid(struct sock *sk, u32 ack, 139static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
140 u32 in_flight, int data_acked)
141{ 140{
142 struct tcp_sock *tp = tcp_sk(sk); 141 struct tcp_sock *tp = tcp_sk(sk);
143 struct bictcp *ca = inet_csk_ca(sk); 142 struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 55fca1820c34..3a6be23d222f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -274,6 +274,27 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
274 return err; 274 return err;
275} 275}
276 276
277/* RFC2861 Check whether we are limited by application or congestion window
278 * This is the inverse of cwnd check in tcp_tso_should_defer
279 */
280int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
281{
282 const struct tcp_sock *tp = tcp_sk(sk);
283 u32 left;
284
285 if (in_flight >= tp->snd_cwnd)
286 return 1;
287
288 if (!sk_can_gso(sk))
289 return 0;
290
291 left = tp->snd_cwnd - in_flight;
292 if (sysctl_tcp_tso_win_divisor)
293 return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd;
294 else
295 return left <= tcp_max_burst(tp);
296}
297EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
277 298
278/* 299/*
279 * Slow start is used when congestion window is less than slow start 300 * Slow start is used when congestion window is less than slow start
@@ -324,7 +345,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
324/* This is Jacobson's slow start and congestion avoidance. 345/* This is Jacobson's slow start and congestion avoidance.
325 * SIGCOMM '88, p. 328. 346 * SIGCOMM '88, p. 328.
326 */ 347 */
327void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) 348void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
328{ 349{
329 struct tcp_sock *tp = tcp_sk(sk); 350 struct tcp_sock *tp = tcp_sk(sk);
330 351
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 80bd084a9f91..3aa0b23c1ea0 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -246,8 +246,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
246 ca->cnt = 1; 246 ca->cnt = 1;
247} 247}
248 248
249static void bictcp_cong_avoid(struct sock *sk, u32 ack, 249static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
250 u32 in_flight, int data_acked)
251{ 250{
252 struct tcp_sock *tp = tcp_sk(sk); 251 struct tcp_sock *tp = tcp_sk(sk);
253 struct bictcp *ca = inet_csk_ca(sk); 252 struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 14a073d8b60f..8b6caaf75bb9 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,8 +109,7 @@ static void hstcp_init(struct sock *sk)
109 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); 109 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
110} 110}
111 111
112static void hstcp_cong_avoid(struct sock *sk, u32 adk, 112static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight)
113 u32 in_flight, int data_acked)
114{ 113{
115 struct tcp_sock *tp = tcp_sk(sk); 114 struct tcp_sock *tp = tcp_sk(sk);
116 struct hstcp *ca = inet_csk_ca(sk); 115 struct hstcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 5215691f2760..af99776146ff 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -225,8 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
225 return max((tp->snd_cwnd * ca->beta) >> 7, 2U); 225 return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
226} 226}
227 227
228static void htcp_cong_avoid(struct sock *sk, u32 ack, 228static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
229 u32 in_flight, int data_acked)
230{ 229{
231 struct tcp_sock *tp = tcp_sk(sk); 230 struct tcp_sock *tp = tcp_sk(sk);
232 struct htcp *ca = inet_csk_ca(sk); 231 struct htcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index b3e55cf56171..44618b675916 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -85,8 +85,7 @@ static inline u32 hybla_fraction(u32 odds)
85 * o Give cwnd a new value based on the model proposed 85 * o Give cwnd a new value based on the model proposed
86 * o remember increments <1 86 * o remember increments <1
87 */ 87 */
88static void hybla_cong_avoid(struct sock *sk, u32 ack, 88static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
89 u32 in_flight, int flag)
90{ 89{
91 struct tcp_sock *tp = tcp_sk(sk); 90 struct tcp_sock *tp = tcp_sk(sk);
92 struct hybla *ca = inet_csk_ca(sk); 91 struct hybla *ca = inet_csk_ca(sk);
@@ -103,7 +102,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack,
103 return; 102 return;
104 103
105 if (!ca->hybla_en) 104 if (!ca->hybla_en)
106 return tcp_reno_cong_avoid(sk, ack, in_flight, flag); 105 return tcp_reno_cong_avoid(sk, ack, in_flight);
107 106
108 if (ca->rho == 0) 107 if (ca->rho == 0)
109 hybla_recalc_param(sk); 108 hybla_recalc_param(sk);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 64f1cbaf96e8..1eba160b72dc 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -256,8 +256,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
256/* 256/*
257 * Increase window in response to successful acknowledgment. 257 * Increase window in response to successful acknowledgment.
258 */ 258 */
259static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, 259static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
260 u32 in_flight, int flag)
261{ 260{
262 struct tcp_sock *tp = tcp_sk(sk); 261 struct tcp_sock *tp = tcp_sk(sk);
263 struct illinois *ca = inet_csk_ca(sk); 262 struct illinois *ca = inet_csk_ca(sk);
@@ -298,7 +297,7 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
298 struct illinois *ca = inet_csk_ca(sk); 297 struct illinois *ca = inet_csk_ca(sk);
299 298
300 /* Multiplicative decrease */ 299 /* Multiplicative decrease */
301 return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U); 300 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
302} 301}
303 302
304 303
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69d8c38ccd39..19c449f62672 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,6 +105,7 @@ int sysctl_tcp_abc __read_mostly;
105#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 105#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
106#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 106#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ 107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
108#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
108 109
109#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 110#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
110#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 111#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -120,8 +121,7 @@ int sysctl_tcp_abc __read_mostly;
120/* Adapt the MSS value used to make delayed ack decision to the 121/* Adapt the MSS value used to make delayed ack decision to the
121 * real world. 122 * real world.
122 */ 123 */
123static void tcp_measure_rcv_mss(struct sock *sk, 124static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
124 const struct sk_buff *skb)
125{ 125{
126 struct inet_connection_sock *icsk = inet_csk(sk); 126 struct inet_connection_sock *icsk = inet_csk(sk);
127 const unsigned int lss = icsk->icsk_ack.last_seg_size; 127 const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -132,7 +132,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
132 /* skb->len may jitter because of SACKs, even if peer 132 /* skb->len may jitter because of SACKs, even if peer
133 * sends good full-sized frames. 133 * sends good full-sized frames.
134 */ 134 */
135 len = skb_shinfo(skb)->gso_size ?: skb->len; 135 len = skb_shinfo(skb)->gso_size ? : skb->len;
136 if (len >= icsk->icsk_ack.rcv_mss) { 136 if (len >= icsk->icsk_ack.rcv_mss) {
137 icsk->icsk_ack.rcv_mss = len; 137 icsk->icsk_ack.rcv_mss = len;
138 } else { 138 } else {
@@ -172,8 +172,8 @@ static void tcp_incr_quickack(struct sock *sk)
172 struct inet_connection_sock *icsk = inet_csk(sk); 172 struct inet_connection_sock *icsk = inet_csk(sk);
173 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); 173 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
174 174
175 if (quickacks==0) 175 if (quickacks == 0)
176 quickacks=2; 176 quickacks = 2;
177 if (quickacks > icsk->icsk_ack.quick) 177 if (quickacks > icsk->icsk_ack.quick)
178 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); 178 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
179} 179}
@@ -198,7 +198,7 @@ static inline int tcp_in_quickack_mode(const struct sock *sk)
198 198
199static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) 199static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
200{ 200{
201 if (tp->ecn_flags&TCP_ECN_OK) 201 if (tp->ecn_flags & TCP_ECN_OK)
202 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 202 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
203} 203}
204 204
@@ -215,7 +215,7 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
215 215
216static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) 216static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
217{ 217{
218 if (tp->ecn_flags&TCP_ECN_OK) { 218 if (tp->ecn_flags & TCP_ECN_OK) {
219 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) 219 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
220 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 220 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
221 /* Funny extension: if ECT is not set on a segment, 221 /* Funny extension: if ECT is not set on a segment,
@@ -228,19 +228,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
228 228
229static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) 229static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
230{ 230{
231 if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr)) 231 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
232 tp->ecn_flags &= ~TCP_ECN_OK; 232 tp->ecn_flags &= ~TCP_ECN_OK;
233} 233}
234 234
235static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) 235static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
236{ 236{
237 if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr)) 237 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
238 tp->ecn_flags &= ~TCP_ECN_OK; 238 tp->ecn_flags &= ~TCP_ECN_OK;
239} 239}
240 240
241static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) 241static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
242{ 242{
243 if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK)) 243 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
244 return 1; 244 return 1;
245 return 0; 245 return 0;
246} 246}
@@ -289,8 +289,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
289{ 289{
290 struct tcp_sock *tp = tcp_sk(sk); 290 struct tcp_sock *tp = tcp_sk(sk);
291 /* Optimize this! */ 291 /* Optimize this! */
292 int truesize = tcp_win_from_space(skb->truesize)/2; 292 int truesize = tcp_win_from_space(skb->truesize) >> 1;
293 int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; 293 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
294 294
295 while (tp->rcv_ssthresh <= window) { 295 while (tp->rcv_ssthresh <= window) {
296 if (truesize <= skb->len) 296 if (truesize <= skb->len)
@@ -302,8 +302,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
302 return 0; 302 return 0;
303} 303}
304 304
305static void tcp_grow_window(struct sock *sk, 305static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
306 struct sk_buff *skb)
307{ 306{
308 struct tcp_sock *tp = tcp_sk(sk); 307 struct tcp_sock *tp = tcp_sk(sk);
309 308
@@ -317,12 +316,13 @@ static void tcp_grow_window(struct sock *sk,
317 * will fit to rcvbuf in future. 316 * will fit to rcvbuf in future.
318 */ 317 */
319 if (tcp_win_from_space(skb->truesize) <= skb->len) 318 if (tcp_win_from_space(skb->truesize) <= skb->len)
320 incr = 2*tp->advmss; 319 incr = 2 * tp->advmss;
321 else 320 else
322 incr = __tcp_grow_window(sk, skb); 321 incr = __tcp_grow_window(sk, skb);
323 322
324 if (incr) { 323 if (incr) {
325 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); 324 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
325 tp->window_clamp);
326 inet_csk(sk)->icsk_ack.quick |= 1; 326 inet_csk(sk)->icsk_ack.quick |= 1;
327 } 327 }
328 } 328 }
@@ -397,10 +397,9 @@ static void tcp_clamp_window(struct sock *sk)
397 sysctl_tcp_rmem[2]); 397 sysctl_tcp_rmem[2]);
398 } 398 }
399 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) 399 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
400 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 400 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
401} 401}
402 402
403
404/* Initialize RCV_MSS value. 403/* Initialize RCV_MSS value.
405 * RCV_MSS is an our guess about MSS used by the peer. 404 * RCV_MSS is an our guess about MSS used by the peer.
406 * We haven't any direct information about the MSS. 405 * We haven't any direct information about the MSS.
@@ -413,7 +412,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
413 struct tcp_sock *tp = tcp_sk(sk); 412 struct tcp_sock *tp = tcp_sk(sk);
414 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 413 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
415 414
416 hint = min(hint, tp->rcv_wnd/2); 415 hint = min(hint, tp->rcv_wnd / 2);
417 hint = min(hint, TCP_MIN_RCVMSS); 416 hint = min(hint, TCP_MIN_RCVMSS);
418 hint = max(hint, TCP_MIN_MSS); 417 hint = max(hint, TCP_MIN_MSS);
419 418
@@ -470,16 +469,15 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
470 goto new_measure; 469 goto new_measure;
471 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) 470 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
472 return; 471 return;
473 tcp_rcv_rtt_update(tp, 472 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
474 jiffies - tp->rcv_rtt_est.time,
475 1);
476 473
477new_measure: 474new_measure:
478 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; 475 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
479 tp->rcv_rtt_est.time = tcp_time_stamp; 476 tp->rcv_rtt_est.time = tcp_time_stamp;
480} 477}
481 478
482static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) 479static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
480 const struct sk_buff *skb)
483{ 481{
484 struct tcp_sock *tp = tcp_sk(sk); 482 struct tcp_sock *tp = tcp_sk(sk);
485 if (tp->rx_opt.rcv_tsecr && 483 if (tp->rx_opt.rcv_tsecr &&
@@ -502,8 +500,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
502 goto new_measure; 500 goto new_measure;
503 501
504 time = tcp_time_stamp - tp->rcvq_space.time; 502 time = tcp_time_stamp - tp->rcvq_space.time;
505 if (time < (tp->rcv_rtt_est.rtt >> 3) || 503 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
506 tp->rcv_rtt_est.rtt == 0)
507 return; 504 return;
508 505
509 space = 2 * (tp->copied_seq - tp->rcvq_space.seq); 506 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
@@ -579,7 +576,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
579 } else { 576 } else {
580 int m = now - icsk->icsk_ack.lrcvtime; 577 int m = now - icsk->icsk_ack.lrcvtime;
581 578
582 if (m <= TCP_ATO_MIN/2) { 579 if (m <= TCP_ATO_MIN / 2) {
583 /* The fastest case is the first. */ 580 /* The fastest case is the first. */
584 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; 581 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
585 } else if (m < icsk->icsk_ack.ato) { 582 } else if (m < icsk->icsk_ack.ato) {
@@ -591,7 +588,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
591 * restart window, so that we send ACKs quickly. 588 * restart window, so that we send ACKs quickly.
592 */ 589 */
593 tcp_incr_quickack(sk); 590 tcp_incr_quickack(sk);
594 sk_stream_mem_reclaim(sk); 591 sk_mem_reclaim(sk);
595 } 592 }
596 } 593 }
597 icsk->icsk_ack.lrcvtime = now; 594 icsk->icsk_ack.lrcvtime = now;
@@ -608,7 +605,7 @@ static u32 tcp_rto_min(struct sock *sk)
608 u32 rto_min = TCP_RTO_MIN; 605 u32 rto_min = TCP_RTO_MIN;
609 606
610 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) 607 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
611 rto_min = dst->metrics[RTAX_RTO_MIN-1]; 608 rto_min = dst->metrics[RTAX_RTO_MIN - 1];
612 return rto_min; 609 return rto_min;
613} 610}
614 611
@@ -671,14 +668,14 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
671 } 668 }
672 if (after(tp->snd_una, tp->rtt_seq)) { 669 if (after(tp->snd_una, tp->rtt_seq)) {
673 if (tp->mdev_max < tp->rttvar) 670 if (tp->mdev_max < tp->rttvar)
674 tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; 671 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
675 tp->rtt_seq = tp->snd_nxt; 672 tp->rtt_seq = tp->snd_nxt;
676 tp->mdev_max = tcp_rto_min(sk); 673 tp->mdev_max = tcp_rto_min(sk);
677 } 674 }
678 } else { 675 } else {
679 /* no previous measure. */ 676 /* no previous measure. */
680 tp->srtt = m<<3; /* take the measured time to be rtt */ 677 tp->srtt = m << 3; /* take the measured time to be rtt */
681 tp->mdev = m<<1; /* make sure rto = 3*rtt */ 678 tp->mdev = m << 1; /* make sure rto = 3*rtt */
682 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 679 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
683 tp->rtt_seq = tp->snd_nxt; 680 tp->rtt_seq = tp->snd_nxt;
684 } 681 }
@@ -732,7 +729,7 @@ void tcp_update_metrics(struct sock *sk)
732 729
733 dst_confirm(dst); 730 dst_confirm(dst);
734 731
735 if (dst && (dst->flags&DST_HOST)) { 732 if (dst && (dst->flags & DST_HOST)) {
736 const struct inet_connection_sock *icsk = inet_csk(sk); 733 const struct inet_connection_sock *icsk = inet_csk(sk);
737 int m; 734 int m;
738 735
@@ -742,7 +739,7 @@ void tcp_update_metrics(struct sock *sk)
742 * Reset our results. 739 * Reset our results.
743 */ 740 */
744 if (!(dst_metric_locked(dst, RTAX_RTT))) 741 if (!(dst_metric_locked(dst, RTAX_RTT)))
745 dst->metrics[RTAX_RTT-1] = 0; 742 dst->metrics[RTAX_RTT - 1] = 0;
746 return; 743 return;
747 } 744 }
748 745
@@ -754,9 +751,9 @@ void tcp_update_metrics(struct sock *sk)
754 */ 751 */
755 if (!(dst_metric_locked(dst, RTAX_RTT))) { 752 if (!(dst_metric_locked(dst, RTAX_RTT))) {
756 if (m <= 0) 753 if (m <= 0)
757 dst->metrics[RTAX_RTT-1] = tp->srtt; 754 dst->metrics[RTAX_RTT - 1] = tp->srtt;
758 else 755 else
759 dst->metrics[RTAX_RTT-1] -= (m>>3); 756 dst->metrics[RTAX_RTT - 1] -= (m >> 3);
760 } 757 }
761 758
762 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { 759 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
@@ -769,7 +766,7 @@ void tcp_update_metrics(struct sock *sk)
769 m = tp->mdev; 766 m = tp->mdev;
770 767
771 if (m >= dst_metric(dst, RTAX_RTTVAR)) 768 if (m >= dst_metric(dst, RTAX_RTTVAR))
772 dst->metrics[RTAX_RTTVAR-1] = m; 769 dst->metrics[RTAX_RTTVAR - 1] = m;
773 else 770 else
774 dst->metrics[RTAX_RTTVAR-1] -= 771 dst->metrics[RTAX_RTTVAR-1] -=
775 (dst->metrics[RTAX_RTTVAR-1] - m)>>2; 772 (dst->metrics[RTAX_RTTVAR-1] - m)>>2;
@@ -783,7 +780,7 @@ void tcp_update_metrics(struct sock *sk)
783 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; 780 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
784 if (!dst_metric_locked(dst, RTAX_CWND) && 781 if (!dst_metric_locked(dst, RTAX_CWND) &&
785 tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) 782 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
786 dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; 783 dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
787 } else if (tp->snd_cwnd > tp->snd_ssthresh && 784 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
788 icsk->icsk_ca_state == TCP_CA_Open) { 785 icsk->icsk_ca_state == TCP_CA_Open) {
789 /* Cong. avoidance phase, cwnd is reliable. */ 786 /* Cong. avoidance phase, cwnd is reliable. */
@@ -863,6 +860,9 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
863 */ 860 */
864static void tcp_disable_fack(struct tcp_sock *tp) 861static void tcp_disable_fack(struct tcp_sock *tp)
865{ 862{
863 /* RFC3517 uses different metric in lost marker => reset on change */
864 if (tcp_is_fack(tp))
865 tp->lost_skb_hint = NULL;
866 tp->rx_opt.sack_ok &= ~2; 866 tp->rx_opt.sack_ok &= ~2;
867} 867}
868 868
@@ -923,7 +923,7 @@ static void tcp_init_metrics(struct sock *sk)
923 } 923 }
924 if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) { 924 if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
925 tp->mdev = dst_metric(dst, RTAX_RTTVAR); 925 tp->mdev = dst_metric(dst, RTAX_RTTVAR);
926 tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); 926 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
927 } 927 }
928 tcp_set_rto(sk); 928 tcp_set_rto(sk);
929 tcp_bound_rto(sk); 929 tcp_bound_rto(sk);
@@ -1112,16 +1112,22 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1112 * 1112 *
1113 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1113 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
1114 * less than what is now known to be received by the other end (derived from 1114 * less than what is now known to be received by the other end (derived from
1115 * SACK blocks by the caller). Also calculate the lowest snd_nxt among the 1115 * highest SACK block). Also calculate the lowest snd_nxt among the remaining
1116 * remaining retransmitted skbs to avoid some costly processing per ACKs. 1116 * retransmitted skbs to avoid some costly processing per ACKs.
1117 */ 1117 */
1118static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto) 1118static void tcp_mark_lost_retrans(struct sock *sk)
1119{ 1119{
1120 const struct inet_connection_sock *icsk = inet_csk(sk);
1120 struct tcp_sock *tp = tcp_sk(sk); 1121 struct tcp_sock *tp = tcp_sk(sk);
1121 struct sk_buff *skb; 1122 struct sk_buff *skb;
1122 int flag = 0;
1123 int cnt = 0; 1123 int cnt = 0;
1124 u32 new_low_seq = tp->snd_nxt; 1124 u32 new_low_seq = tp->snd_nxt;
1125 u32 received_upto = tcp_highest_sack_seq(tp);
1126
1127 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1128 !after(received_upto, tp->lost_retrans_low) ||
1129 icsk->icsk_ca_state != TCP_CA_Recovery)
1130 return;
1125 1131
1126 tcp_for_write_queue(skb, sk) { 1132 tcp_for_write_queue(skb, sk) {
1127 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; 1133 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1149,9 +1155,8 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1149 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { 1155 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1150 tp->lost_out += tcp_skb_pcount(skb); 1156 tp->lost_out += tcp_skb_pcount(skb);
1151 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1157 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1152 flag |= FLAG_DATA_SACKED;
1153 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
1154 } 1158 }
1159 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
1155 } else { 1160 } else {
1156 if (before(ack_seq, new_low_seq)) 1161 if (before(ack_seq, new_low_seq))
1157 new_low_seq = ack_seq; 1162 new_low_seq = ack_seq;
@@ -1161,8 +1166,6 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1161 1166
1162 if (tp->retrans_out) 1167 if (tp->retrans_out)
1163 tp->lost_retrans_low = new_low_seq; 1168 tp->lost_retrans_low = new_low_seq;
1164
1165 return flag;
1166} 1169}
1167 1170
1168static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, 1171static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
@@ -1230,34 +1233,205 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1230 return in_sack; 1233 return in_sack;
1231} 1234}
1232 1235
1236static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1237 int *reord, int dup_sack, int fack_count)
1238{
1239 struct tcp_sock *tp = tcp_sk(sk);
1240 u8 sacked = TCP_SKB_CB(skb)->sacked;
1241 int flag = 0;
1242
1243 /* Account D-SACK for retransmitted packet. */
1244 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1245 if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1246 tp->undo_retrans--;
1247 if (sacked & TCPCB_SACKED_ACKED)
1248 *reord = min(fack_count, *reord);
1249 }
1250
1251 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
1252 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1253 return flag;
1254
1255 if (!(sacked & TCPCB_SACKED_ACKED)) {
1256 if (sacked & TCPCB_SACKED_RETRANS) {
1257 /* If the segment is not tagged as lost,
1258 * we do not clear RETRANS, believing
1259 * that retransmission is still in flight.
1260 */
1261 if (sacked & TCPCB_LOST) {
1262 TCP_SKB_CB(skb)->sacked &=
1263 ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1264 tp->lost_out -= tcp_skb_pcount(skb);
1265 tp->retrans_out -= tcp_skb_pcount(skb);
1266
1267 /* clear lost hint */
1268 tp->retransmit_skb_hint = NULL;
1269 }
1270 } else {
1271 if (!(sacked & TCPCB_RETRANS)) {
1272 /* New sack for not retransmitted frame,
1273 * which was in hole. It is reordering.
1274 */
1275 if (before(TCP_SKB_CB(skb)->seq,
1276 tcp_highest_sack_seq(tp)))
1277 *reord = min(fack_count, *reord);
1278
1279 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1280 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1281 flag |= FLAG_ONLY_ORIG_SACKED;
1282 }
1283
1284 if (sacked & TCPCB_LOST) {
1285 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1286 tp->lost_out -= tcp_skb_pcount(skb);
1287
1288 /* clear lost hint */
1289 tp->retransmit_skb_hint = NULL;
1290 }
1291 }
1292
1293 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
1294 flag |= FLAG_DATA_SACKED;
1295 tp->sacked_out += tcp_skb_pcount(skb);
1296
1297 fack_count += tcp_skb_pcount(skb);
1298
1299 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1300 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1301 before(TCP_SKB_CB(skb)->seq,
1302 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1303 tp->lost_cnt_hint += tcp_skb_pcount(skb);
1304
1305 if (fack_count > tp->fackets_out)
1306 tp->fackets_out = fack_count;
1307
1308 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
1309 tcp_advance_highest_sack(sk, skb);
1310 }
1311
1312 /* D-SACK. We can detect redundant retransmission in S|R and plain R
1313 * frames and clear it. undo_retrans is decreased above, L|R frames
1314 * are accounted above as well.
1315 */
1316 if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
1317 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1318 tp->retrans_out -= tcp_skb_pcount(skb);
1319 tp->retransmit_skb_hint = NULL;
1320 }
1321
1322 return flag;
1323}
1324
1325static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1326 struct tcp_sack_block *next_dup,
1327 u32 start_seq, u32 end_seq,
1328 int dup_sack_in, int *fack_count,
1329 int *reord, int *flag)
1330{
1331 tcp_for_write_queue_from(skb, sk) {
1332 int in_sack = 0;
1333 int dup_sack = dup_sack_in;
1334
1335 if (skb == tcp_send_head(sk))
1336 break;
1337
1338 /* queue is in-order => we can short-circuit the walk early */
1339 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1340 break;
1341
1342 if ((next_dup != NULL) &&
1343 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1344 in_sack = tcp_match_skb_to_sack(sk, skb,
1345 next_dup->start_seq,
1346 next_dup->end_seq);
1347 if (in_sack > 0)
1348 dup_sack = 1;
1349 }
1350
1351 if (in_sack <= 0)
1352 in_sack = tcp_match_skb_to_sack(sk, skb, start_seq,
1353 end_seq);
1354 if (unlikely(in_sack < 0))
1355 break;
1356
1357 if (in_sack)
1358 *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack,
1359 *fack_count);
1360
1361 *fack_count += tcp_skb_pcount(skb);
1362 }
1363 return skb;
1364}
1365
1366/* Avoid all extra work that is being done by sacktag while walking in
1367 * a normal way
1368 */
1369static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1370 u32 skip_to_seq)
1371{
1372 tcp_for_write_queue_from(skb, sk) {
1373 if (skb == tcp_send_head(sk))
1374 break;
1375
1376 if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1377 break;
1378 }
1379 return skb;
1380}
1381
1382static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1383 struct sock *sk,
1384 struct tcp_sack_block *next_dup,
1385 u32 skip_to_seq,
1386 int *fack_count, int *reord,
1387 int *flag)
1388{
1389 if (next_dup == NULL)
1390 return skb;
1391
1392 if (before(next_dup->start_seq, skip_to_seq)) {
1393 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1394 tcp_sacktag_walk(skb, sk, NULL,
1395 next_dup->start_seq, next_dup->end_seq,
1396 1, fack_count, reord, flag);
1397 }
1398
1399 return skb;
1400}
1401
1402static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
1403{
1404 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1405}
1406
1233static int 1407static int
1234tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) 1408tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1409 u32 prior_snd_una)
1235{ 1410{
1236 const struct inet_connection_sock *icsk = inet_csk(sk); 1411 const struct inet_connection_sock *icsk = inet_csk(sk);
1237 struct tcp_sock *tp = tcp_sk(sk); 1412 struct tcp_sock *tp = tcp_sk(sk);
1238 unsigned char *ptr = (skb_transport_header(ack_skb) + 1413 unsigned char *ptr = (skb_transport_header(ack_skb) +
1239 TCP_SKB_CB(ack_skb)->sacked); 1414 TCP_SKB_CB(ack_skb)->sacked);
1240 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); 1415 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1241 struct sk_buff *cached_skb; 1416 struct tcp_sack_block sp[4];
1242 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; 1417 struct tcp_sack_block *cache;
1418 struct sk_buff *skb;
1419 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
1420 int used_sacks;
1243 int reord = tp->packets_out; 1421 int reord = tp->packets_out;
1244 int prior_fackets;
1245 u32 highest_sack_end_seq = tp->lost_retrans_low;
1246 int flag = 0; 1422 int flag = 0;
1247 int found_dup_sack = 0; 1423 int found_dup_sack = 0;
1248 int cached_fack_count; 1424 int fack_count;
1249 int i; 1425 int i, j;
1250 int first_sack_index; 1426 int first_sack_index;
1251 int force_one_sack;
1252 1427
1253 if (!tp->sacked_out) { 1428 if (!tp->sacked_out) {
1254 if (WARN_ON(tp->fackets_out)) 1429 if (WARN_ON(tp->fackets_out))
1255 tp->fackets_out = 0; 1430 tp->fackets_out = 0;
1256 tp->highest_sack = tp->snd_una; 1431 tcp_highest_sack_reset(sk);
1257 } 1432 }
1258 prior_fackets = tp->fackets_out;
1259 1433
1260 found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, 1434 found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
1261 num_sacks, prior_snd_una); 1435 num_sacks, prior_snd_una);
1262 if (found_dup_sack) 1436 if (found_dup_sack)
1263 flag |= FLAG_DSACKING_ACK; 1437 flag |= FLAG_DSACKING_ACK;
@@ -1269,75 +1443,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1269 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) 1443 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1270 return 0; 1444 return 0;
1271 1445
1272 /* SACK fastpath: 1446 if (!tp->packets_out)
1273 * if the only SACK change is the increase of the end_seq of 1447 goto out;
1274 * the first block then only apply that SACK block
1275 * and use retrans queue hinting otherwise slowpath */
1276 force_one_sack = 1;
1277 for (i = 0; i < num_sacks; i++) {
1278 __be32 start_seq = sp[i].start_seq;
1279 __be32 end_seq = sp[i].end_seq;
1280
1281 if (i == 0) {
1282 if (tp->recv_sack_cache[i].start_seq != start_seq)
1283 force_one_sack = 0;
1284 } else {
1285 if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
1286 (tp->recv_sack_cache[i].end_seq != end_seq))
1287 force_one_sack = 0;
1288 }
1289 tp->recv_sack_cache[i].start_seq = start_seq;
1290 tp->recv_sack_cache[i].end_seq = end_seq;
1291 }
1292 /* Clear the rest of the cache sack blocks so they won't match mistakenly. */
1293 for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {
1294 tp->recv_sack_cache[i].start_seq = 0;
1295 tp->recv_sack_cache[i].end_seq = 0;
1296 }
1297 1448
1449 used_sacks = 0;
1298 first_sack_index = 0; 1450 first_sack_index = 0;
1299 if (force_one_sack) 1451 for (i = 0; i < num_sacks; i++) {
1300 num_sacks = 1; 1452 int dup_sack = !i && found_dup_sack;
1301 else {
1302 int j;
1303 tp->fastpath_skb_hint = NULL;
1304
1305 /* order SACK blocks to allow in order walk of the retrans queue */
1306 for (i = num_sacks-1; i > 0; i--) {
1307 for (j = 0; j < i; j++){
1308 if (after(ntohl(sp[j].start_seq),
1309 ntohl(sp[j+1].start_seq))){
1310 struct tcp_sack_block_wire tmp;
1311
1312 tmp = sp[j];
1313 sp[j] = sp[j+1];
1314 sp[j+1] = tmp;
1315
1316 /* Track where the first SACK block goes to */
1317 if (j == first_sack_index)
1318 first_sack_index = j+1;
1319 }
1320
1321 }
1322 }
1323 }
1324
1325 /* Use SACK fastpath hint if valid */
1326 cached_skb = tp->fastpath_skb_hint;
1327 cached_fack_count = tp->fastpath_cnt_hint;
1328 if (!cached_skb) {
1329 cached_skb = tcp_write_queue_head(sk);
1330 cached_fack_count = 0;
1331 }
1332 1453
1333 for (i=0; i<num_sacks; i++, sp++) { 1454 sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq));
1334 struct sk_buff *skb; 1455 sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq));
1335 __u32 start_seq = ntohl(sp->start_seq);
1336 __u32 end_seq = ntohl(sp->end_seq);
1337 int fack_count;
1338 int dup_sack = (found_dup_sack && (i == first_sack_index));
1339 1456
1340 if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { 1457 if (!tcp_is_sackblock_valid(tp, dup_sack,
1458 sp[used_sacks].start_seq,
1459 sp[used_sacks].end_seq)) {
1341 if (dup_sack) { 1460 if (dup_sack) {
1342 if (!tp->undo_marker) 1461 if (!tp->undo_marker)
1343 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); 1462 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
@@ -1346,157 +1465,152 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1346 } else { 1465 } else {
1347 /* Don't count olds caused by ACK reordering */ 1466 /* Don't count olds caused by ACK reordering */
1348 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && 1467 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1349 !after(end_seq, tp->snd_una)) 1468 !after(sp[used_sacks].end_seq, tp->snd_una))
1350 continue; 1469 continue;
1351 NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); 1470 NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
1352 } 1471 }
1472 if (i == 0)
1473 first_sack_index = -1;
1353 continue; 1474 continue;
1354 } 1475 }
1355 1476
1356 skb = cached_skb; 1477 /* Ignore very old stuff early */
1357 fack_count = cached_fack_count; 1478 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1479 continue;
1358 1480
1359 /* Event "B" in the comment above. */ 1481 used_sacks++;
1360 if (after(end_seq, tp->high_seq)) 1482 }
1361 flag |= FLAG_DATA_LOST;
1362 1483
1363 tcp_for_write_queue_from(skb, sk) { 1484 /* order SACK blocks to allow in order walk of the retrans queue */
1364 int in_sack; 1485 for (i = used_sacks - 1; i > 0; i--) {
1365 u8 sacked; 1486 for (j = 0; j < i; j++) {
1487 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1488 struct tcp_sack_block tmp;
1366 1489
1367 if (skb == tcp_send_head(sk)) 1490 tmp = sp[j];
1368 break; 1491 sp[j] = sp[j + 1];
1492 sp[j + 1] = tmp;
1369 1493
1370 cached_skb = skb; 1494 /* Track where the first SACK block goes to */
1371 cached_fack_count = fack_count; 1495 if (j == first_sack_index)
1372 if (i == first_sack_index) { 1496 first_sack_index = j + 1;
1373 tp->fastpath_skb_hint = skb;
1374 tp->fastpath_cnt_hint = fack_count;
1375 } 1497 }
1498 }
1499 }
1376 1500
1377 /* The retransmission queue is always in order, so 1501 skb = tcp_write_queue_head(sk);
1378 * we can short-circuit the walk early. 1502 fack_count = 0;
1379 */ 1503 i = 0;
1380 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1381 break;
1382 1504
1383 in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); 1505 if (!tp->sacked_out) {
1384 if (in_sack < 0) 1506 /* It's already past, so skip checking against it */
1385 break; 1507 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1508 } else {
1509 cache = tp->recv_sack_cache;
1510 /* Skip empty blocks in at head of the cache */
1511 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1512 !cache->end_seq)
1513 cache++;
1514 }
1386 1515
1387 fack_count += tcp_skb_pcount(skb); 1516 while (i < used_sacks) {
1388 1517 u32 start_seq = sp[i].start_seq;
1389 sacked = TCP_SKB_CB(skb)->sacked; 1518 u32 end_seq = sp[i].end_seq;
1390 1519 int dup_sack = (found_dup_sack && (i == first_sack_index));
1391 /* Account D-SACK for retransmitted packet. */ 1520 struct tcp_sack_block *next_dup = NULL;
1392 if ((dup_sack && in_sack) && 1521
1393 (sacked & TCPCB_RETRANS) && 1522 if (found_dup_sack && ((i + 1) == first_sack_index))
1394 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) 1523 next_dup = &sp[i + 1];
1395 tp->undo_retrans--; 1524
1396 1525 /* Event "B" in the comment above. */
1397 /* The frame is ACKed. */ 1526 if (after(end_seq, tp->high_seq))
1398 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { 1527 flag |= FLAG_DATA_LOST;
1399 if (sacked&TCPCB_RETRANS) {
1400 if ((dup_sack && in_sack) &&
1401 (sacked&TCPCB_SACKED_ACKED))
1402 reord = min(fack_count, reord);
1403 } else {
1404 /* If it was in a hole, we detected reordering. */
1405 if (fack_count < prior_fackets &&
1406 !(sacked&TCPCB_SACKED_ACKED))
1407 reord = min(fack_count, reord);
1408 }
1409 1528
1410 /* Nothing to do; acked frame is about to be dropped. */ 1529 /* Skip too early cached blocks */
1411 continue; 1530 while (tcp_sack_cache_ok(tp, cache) &&
1531 !before(start_seq, cache->end_seq))
1532 cache++;
1533
1534 /* Can skip some work by looking recv_sack_cache? */
1535 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1536 after(end_seq, cache->start_seq)) {
1537
1538 /* Head todo? */
1539 if (before(start_seq, cache->start_seq)) {
1540 skb = tcp_sacktag_skip(skb, sk, start_seq);
1541 skb = tcp_sacktag_walk(skb, sk, next_dup,
1542 start_seq,
1543 cache->start_seq,
1544 dup_sack, &fack_count,
1545 &reord, &flag);
1412 } 1546 }
1413 1547
1414 if (!in_sack) 1548 /* Rest of the block already fully processed? */
1415 continue; 1549 if (!after(end_seq, cache->end_seq))
1550 goto advance_sp;
1416 1551
1417 if (!(sacked&TCPCB_SACKED_ACKED)) { 1552 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1418 if (sacked & TCPCB_SACKED_RETRANS) { 1553 cache->end_seq,
1419 /* If the segment is not tagged as lost, 1554 &fack_count, &reord,
1420 * we do not clear RETRANS, believing 1555 &flag);
1421 * that retransmission is still in flight.
1422 */
1423 if (sacked & TCPCB_LOST) {
1424 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1425 tp->lost_out -= tcp_skb_pcount(skb);
1426 tp->retrans_out -= tcp_skb_pcount(skb);
1427 1556
1428 /* clear lost hint */ 1557 /* ...tail remains todo... */
1429 tp->retransmit_skb_hint = NULL; 1558 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1430 } 1559 /* ...but better entrypoint exists! */
1431 } else { 1560 skb = tcp_highest_sack(sk);
1432 /* New sack for not retransmitted frame, 1561 if (skb == NULL)
1433 * which was in hole. It is reordering. 1562 break;
1434 */ 1563 fack_count = tp->fackets_out;
1435 if (!(sacked & TCPCB_RETRANS) && 1564 cache++;
1436 fack_count < prior_fackets) 1565 goto walk;
1437 reord = min(fack_count, reord); 1566 }
1438 1567
1439 if (sacked & TCPCB_LOST) { 1568 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1440 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1569 /* Check overlap against next cached too (past this one already) */
1441 tp->lost_out -= tcp_skb_pcount(skb); 1570 cache++;
1571 continue;
1572 }
1442 1573
1443 /* clear lost hint */ 1574 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1444 tp->retransmit_skb_hint = NULL; 1575 skb = tcp_highest_sack(sk);
1445 } 1576 if (skb == NULL)
1446 /* SACK enhanced F-RTO detection. 1577 break;
1447 * Set flag if and only if non-rexmitted 1578 fack_count = tp->fackets_out;
1448 * segments below frto_highmark are 1579 }
1449 * SACKed (RFC4138; Appendix B). 1580 skb = tcp_sacktag_skip(skb, sk, start_seq);
1450 * Clearing correct due to in-order walk
1451 */
1452 if (after(end_seq, tp->frto_highmark)) {
1453 flag &= ~FLAG_ONLY_ORIG_SACKED;
1454 } else {
1455 if (!(sacked & TCPCB_RETRANS))
1456 flag |= FLAG_ONLY_ORIG_SACKED;
1457 }
1458 }
1459 1581
1460 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; 1582walk:
1461 flag |= FLAG_DATA_SACKED; 1583 skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
1462 tp->sacked_out += tcp_skb_pcount(skb); 1584 dup_sack, &fack_count, &reord, &flag);
1463 1585
1464 if (fack_count > tp->fackets_out) 1586advance_sp:
1465 tp->fackets_out = fack_count; 1587 /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
1588 * due to in-order walk
1589 */
1590 if (after(end_seq, tp->frto_highmark))
1591 flag &= ~FLAG_ONLY_ORIG_SACKED;
1466 1592
1467 if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) { 1593 i++;
1468 tp->highest_sack = TCP_SKB_CB(skb)->seq; 1594 }
1469 highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
1470 }
1471 } else {
1472 if (dup_sack && (sacked&TCPCB_RETRANS))
1473 reord = min(fack_count, reord);
1474 }
1475 1595
1476 /* D-SACK. We can detect redundant retransmission 1596 /* Clear the head of the cache sack blocks so we can skip it next time */
1477 * in S|R and plain R frames and clear it. 1597 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1478 * undo_retrans is decreased above, L|R frames 1598 tp->recv_sack_cache[i].start_seq = 0;
1479 * are accounted above as well. 1599 tp->recv_sack_cache[i].end_seq = 0;
1480 */
1481 if (dup_sack &&
1482 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
1483 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1484 tp->retrans_out -= tcp_skb_pcount(skb);
1485 tp->retransmit_skb_hint = NULL;
1486 }
1487 }
1488 } 1600 }
1601 for (j = 0; j < used_sacks; j++)
1602 tp->recv_sack_cache[i++] = sp[j];
1489 1603
1490 if (tp->retrans_out && 1604 tcp_mark_lost_retrans(sk);
1491 after(highest_sack_end_seq, tp->lost_retrans_low) &&
1492 icsk->icsk_ca_state == TCP_CA_Recovery)
1493 flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
1494 1605
1495 tcp_verify_left_out(tp); 1606 tcp_verify_left_out(tp);
1496 1607
1497 if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && 1608 if ((reord < tp->fackets_out) &&
1609 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1498 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) 1610 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1499 tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); 1611 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
1612
1613out:
1500 1614
1501#if FASTRETRANS_DEBUG > 0 1615#if FASTRETRANS_DEBUG > 0
1502 BUG_TRAP((int)tp->sacked_out >= 0); 1616 BUG_TRAP((int)tp->sacked_out >= 0);
@@ -1543,10 +1657,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1543 1657
1544 if (acked > 0) { 1658 if (acked > 0) {
1545 /* One ACK acked hole. The rest eat duplicate ACKs. */ 1659 /* One ACK acked hole. The rest eat duplicate ACKs. */
1546 if (acked-1 >= tp->sacked_out) 1660 if (acked - 1 >= tp->sacked_out)
1547 tp->sacked_out = 0; 1661 tp->sacked_out = 0;
1548 else 1662 else
1549 tp->sacked_out -= acked-1; 1663 tp->sacked_out -= acked - 1;
1550 } 1664 }
1551 tcp_check_reno_reordering(sk, acked); 1665 tcp_check_reno_reordering(sk, acked);
1552 tcp_verify_left_out(tp); 1666 tcp_verify_left_out(tp);
@@ -1580,10 +1694,10 @@ int tcp_use_frto(struct sock *sk)
1580 tcp_for_write_queue_from(skb, sk) { 1694 tcp_for_write_queue_from(skb, sk) {
1581 if (skb == tcp_send_head(sk)) 1695 if (skb == tcp_send_head(sk))
1582 break; 1696 break;
1583 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) 1697 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1584 return 0; 1698 return 0;
1585 /* Short-circuit when first non-SACKed skb has been checked */ 1699 /* Short-circuit when first non-SACKed skb has been checked */
1586 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) 1700 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1587 break; 1701 break;
1588 } 1702 }
1589 return 1; 1703 return 1;
@@ -1652,6 +1766,9 @@ void tcp_enter_frto(struct sock *sk)
1652 } 1766 }
1653 tcp_verify_left_out(tp); 1767 tcp_verify_left_out(tp);
1654 1768
1769 /* Too bad if TCP was application limited */
1770 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1771
1655 /* Earlier loss recovery underway (see RFC4138; Appendix B). 1772 /* Earlier loss recovery underway (see RFC4138; Appendix B).
1656 * The last condition is necessary at least in tp->frto_counter case. 1773 * The last condition is necessary at least in tp->frto_counter case.
1657 */ 1774 */
@@ -1684,11 +1801,13 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1684 tcp_for_write_queue(skb, sk) { 1801 tcp_for_write_queue(skb, sk) {
1685 if (skb == tcp_send_head(sk)) 1802 if (skb == tcp_send_head(sk))
1686 break; 1803 break;
1804
1805 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1687 /* 1806 /*
1688 * Count the retransmission made on RTO correctly (only when 1807 * Count the retransmission made on RTO correctly (only when
1689 * waiting for the first ACK and did not get it)... 1808 * waiting for the first ACK and did not get it)...
1690 */ 1809 */
1691 if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) { 1810 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1692 /* For some reason this R-bit might get cleared? */ 1811 /* For some reason this R-bit might get cleared? */
1693 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) 1812 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1694 tp->retrans_out += tcp_skb_pcount(skb); 1813 tp->retrans_out += tcp_skb_pcount(skb);
@@ -1697,11 +1816,11 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1697 } else { 1816 } else {
1698 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 1817 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1699 tp->undo_marker = 0; 1818 tp->undo_marker = 0;
1700 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); 1819 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1701 } 1820 }
1702 1821
1703 /* Don't lost mark skbs that were fwd transmitted after RTO */ 1822 /* Don't lost mark skbs that were fwd transmitted after RTO */
1704 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) && 1823 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
1705 !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { 1824 !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
1706 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1825 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1707 tp->lost_out += tcp_skb_pcount(skb); 1826 tp->lost_out += tcp_skb_pcount(skb);
@@ -1716,7 +1835,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1716 tp->bytes_acked = 0; 1835 tp->bytes_acked = 0;
1717 1836
1718 tp->reordering = min_t(unsigned int, tp->reordering, 1837 tp->reordering = min_t(unsigned int, tp->reordering,
1719 sysctl_tcp_reordering); 1838 sysctl_tcp_reordering);
1720 tcp_set_ca_state(sk, TCP_CA_Loss); 1839 tcp_set_ca_state(sk, TCP_CA_Loss);
1721 tp->high_seq = tp->frto_highmark; 1840 tp->high_seq = tp->frto_highmark;
1722 TCP_ECN_queue_cwr(tp); 1841 TCP_ECN_queue_cwr(tp);
@@ -1783,7 +1902,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1783 if (skb == tcp_send_head(sk)) 1902 if (skb == tcp_send_head(sk))
1784 break; 1903 break;
1785 1904
1786 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) 1905 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1787 tp->undo_marker = 0; 1906 tp->undo_marker = 0;
1788 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; 1907 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1789 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { 1908 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
@@ -1795,7 +1914,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1795 tcp_verify_left_out(tp); 1914 tcp_verify_left_out(tp);
1796 1915
1797 tp->reordering = min_t(unsigned int, tp->reordering, 1916 tp->reordering = min_t(unsigned int, tp->reordering,
1798 sysctl_tcp_reordering); 1917 sysctl_tcp_reordering);
1799 tcp_set_ca_state(sk, TCP_CA_Loss); 1918 tcp_set_ca_state(sk, TCP_CA_Loss);
1800 tp->high_seq = tp->snd_nxt; 1919 tp->high_seq = tp->snd_nxt;
1801 TCP_ECN_queue_cwr(tp); 1920 TCP_ECN_queue_cwr(tp);
@@ -1803,18 +1922,15 @@ void tcp_enter_loss(struct sock *sk, int how)
1803 tp->frto_counter = 0; 1922 tp->frto_counter = 0;
1804} 1923}
1805 1924
1806static int tcp_check_sack_reneging(struct sock *sk) 1925/* If ACK arrived pointing to a remembered SACK, it means that our
1926 * remembered SACKs do not reflect real state of receiver i.e.
1927 * receiver _host_ is heavily congested (or buggy).
1928 *
1929 * Do processing similar to RTO timeout.
1930 */
1931static int tcp_check_sack_reneging(struct sock *sk, int flag)
1807{ 1932{
1808 struct sk_buff *skb; 1933 if (flag & FLAG_SACK_RENEGING) {
1809
1810 /* If ACK arrived pointing to a remembered SACK,
1811 * it means that our remembered SACKs do not reflect
1812 * real state of receiver i.e.
1813 * receiver _host_ is heavily congested (or buggy).
1814 * Do processing similar to RTO timeout.
1815 */
1816 if ((skb = tcp_write_queue_head(sk)) != NULL &&
1817 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1818 struct inet_connection_sock *icsk = inet_csk(sk); 1934 struct inet_connection_sock *icsk = inet_csk(sk);
1819 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); 1935 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
1820 1936
@@ -1830,7 +1946,27 @@ static int tcp_check_sack_reneging(struct sock *sk)
1830 1946
1831static inline int tcp_fackets_out(struct tcp_sock *tp) 1947static inline int tcp_fackets_out(struct tcp_sock *tp)
1832{ 1948{
1833 return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; 1949 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
1950}
1951
1952/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
1953 * counter when SACK is enabled (without SACK, sacked_out is used for
1954 * that purpose).
1955 *
1956 * Instead, with FACK TCP uses fackets_out that includes both SACKed
1957 * segments up to the highest received SACK block so far and holes in
1958 * between them.
1959 *
1960 * With reordering, holes may still be in flight, so RFC3517 recovery
1961 * uses pure sacked_out (total number of SACKed segments) even though
1962 * it violates the RFC that uses duplicate ACKs, often these are equal
1963 * but when e.g. out-of-window ACKs or packet duplication occurs,
1964 * they differ. Since neither occurs due to loss, TCP should really
1965 * ignore them.
1966 */
1967static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
1968{
1969 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
1834} 1970}
1835 1971
1836static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 1972static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
@@ -1953,13 +2089,13 @@ static int tcp_time_to_recover(struct sock *sk)
1953 return 1; 2089 return 1;
1954 2090
1955 /* Not-A-Trick#2 : Classic rule... */ 2091 /* Not-A-Trick#2 : Classic rule... */
1956 if (tcp_fackets_out(tp) > tp->reordering) 2092 if (tcp_dupack_heurestics(tp) > tp->reordering)
1957 return 1; 2093 return 1;
1958 2094
1959 /* Trick#3 : when we use RFC2988 timer restart, fast 2095 /* Trick#3 : when we use RFC2988 timer restart, fast
1960 * retransmit can be triggered by timeout of queue head. 2096 * retransmit can be triggered by timeout of queue head.
1961 */ 2097 */
1962 if (tcp_head_timedout(sk)) 2098 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
1963 return 1; 2099 return 1;
1964 2100
1965 /* Trick#4: It is still not OK... But will it be useful to delay 2101 /* Trick#4: It is still not OK... But will it be useful to delay
@@ -1983,17 +2119,18 @@ static int tcp_time_to_recover(struct sock *sk)
1983 * retransmitted past LOST markings in the first place? I'm not fully sure 2119 * retransmitted past LOST markings in the first place? I'm not fully sure
1984 * about undo and end of connection cases, which can cause R without L? 2120 * about undo and end of connection cases, which can cause R without L?
1985 */ 2121 */
1986static void tcp_verify_retransmit_hint(struct tcp_sock *tp, 2122static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
1987 struct sk_buff *skb)
1988{ 2123{
1989 if ((tp->retransmit_skb_hint != NULL) && 2124 if ((tp->retransmit_skb_hint != NULL) &&
1990 before(TCP_SKB_CB(skb)->seq, 2125 before(TCP_SKB_CB(skb)->seq,
1991 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) 2126 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
1992 tp->retransmit_skb_hint = NULL; 2127 tp->retransmit_skb_hint = NULL;
1993} 2128}
1994 2129
1995/* Mark head of queue up as lost. */ 2130/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
1996static void tcp_mark_head_lost(struct sock *sk, int packets) 2131 * is against sacked "cnt", otherwise it's against facked "cnt"
2132 */
2133static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
1997{ 2134{
1998 struct tcp_sock *tp = tcp_sk(sk); 2135 struct tcp_sock *tp = tcp_sk(sk);
1999 struct sk_buff *skb; 2136 struct sk_buff *skb;
@@ -2015,8 +2152,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2015 /* this is not the most efficient way to do this... */ 2152 /* this is not the most efficient way to do this... */
2016 tp->lost_skb_hint = skb; 2153 tp->lost_skb_hint = skb;
2017 tp->lost_cnt_hint = cnt; 2154 tp->lost_cnt_hint = cnt;
2018 cnt += tcp_skb_pcount(skb); 2155
2019 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) 2156 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2157 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2158 cnt += tcp_skb_pcount(skb);
2159
2160 if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
2161 after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2020 break; 2162 break;
2021 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { 2163 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
2022 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 2164 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2029,17 +2171,22 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2029 2171
2030/* Account newly detected lost packet(s) */ 2172/* Account newly detected lost packet(s) */
2031 2173
2032static void tcp_update_scoreboard(struct sock *sk) 2174static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2033{ 2175{
2034 struct tcp_sock *tp = tcp_sk(sk); 2176 struct tcp_sock *tp = tcp_sk(sk);
2035 2177
2036 if (tcp_is_fack(tp)) { 2178 if (tcp_is_reno(tp)) {
2179 tcp_mark_head_lost(sk, 1, fast_rexmit);
2180 } else if (tcp_is_fack(tp)) {
2037 int lost = tp->fackets_out - tp->reordering; 2181 int lost = tp->fackets_out - tp->reordering;
2038 if (lost <= 0) 2182 if (lost <= 0)
2039 lost = 1; 2183 lost = 1;
2040 tcp_mark_head_lost(sk, lost); 2184 tcp_mark_head_lost(sk, lost, fast_rexmit);
2041 } else { 2185 } else {
2042 tcp_mark_head_lost(sk, 1); 2186 int sacked_upto = tp->sacked_out - tp->reordering;
2187 if (sacked_upto < 0)
2188 sacked_upto = 0;
2189 tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
2043 } 2190 }
2044 2191
2045 /* New heuristics: it is possible only after we switched 2192 /* New heuristics: it is possible only after we switched
@@ -2047,7 +2194,7 @@ static void tcp_update_scoreboard(struct sock *sk)
2047 * Hence, we can detect timed out packets during fast 2194 * Hence, we can detect timed out packets during fast
2048 * retransmit without falling to slow start. 2195 * retransmit without falling to slow start.
2049 */ 2196 */
2050 if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) { 2197 if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
2051 struct sk_buff *skb; 2198 struct sk_buff *skb;
2052 2199
2053 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint 2200 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -2059,7 +2206,7 @@ static void tcp_update_scoreboard(struct sock *sk)
2059 if (!tcp_skb_timedout(sk, skb)) 2206 if (!tcp_skb_timedout(sk, skb))
2060 break; 2207 break;
2061 2208
2062 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 2209 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
2063 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 2210 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2064 tp->lost_out += tcp_skb_pcount(skb); 2211 tp->lost_out += tcp_skb_pcount(skb);
2065 tcp_verify_retransmit_hint(tp, skb); 2212 tcp_verify_retransmit_hint(tp, skb);
@@ -2078,7 +2225,7 @@ static void tcp_update_scoreboard(struct sock *sk)
2078static inline void tcp_moderate_cwnd(struct tcp_sock *tp) 2225static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2079{ 2226{
2080 tp->snd_cwnd = min(tp->snd_cwnd, 2227 tp->snd_cwnd = min(tp->snd_cwnd,
2081 tcp_packets_in_flight(tp)+tcp_max_burst(tp)); 2228 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2082 tp->snd_cwnd_stamp = tcp_time_stamp; 2229 tp->snd_cwnd_stamp = tcp_time_stamp;
2083} 2230}
2084 2231
@@ -2098,15 +2245,15 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
2098 struct tcp_sock *tp = tcp_sk(sk); 2245 struct tcp_sock *tp = tcp_sk(sk);
2099 int decr = tp->snd_cwnd_cnt + 1; 2246 int decr = tp->snd_cwnd_cnt + 1;
2100 2247
2101 if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || 2248 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2102 (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) { 2249 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2103 tp->snd_cwnd_cnt = decr&1; 2250 tp->snd_cwnd_cnt = decr & 1;
2104 decr >>= 1; 2251 decr >>= 1;
2105 2252
2106 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) 2253 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2107 tp->snd_cwnd -= decr; 2254 tp->snd_cwnd -= decr;
2108 2255
2109 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); 2256 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2110 tp->snd_cwnd_stamp = tcp_time_stamp; 2257 tp->snd_cwnd_stamp = tcp_time_stamp;
2111 } 2258 }
2112} 2259}
@@ -2150,7 +2297,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
2150 if (icsk->icsk_ca_ops->undo_cwnd) 2297 if (icsk->icsk_ca_ops->undo_cwnd)
2151 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); 2298 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2152 else 2299 else
2153 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); 2300 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2154 2301
2155 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { 2302 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
2156 tp->snd_ssthresh = tp->prior_ssthresh; 2303 tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2169,8 +2316,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
2169 2316
2170static inline int tcp_may_undo(struct tcp_sock *tp) 2317static inline int tcp_may_undo(struct tcp_sock *tp)
2171{ 2318{
2172 return tp->undo_marker && 2319 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2173 (!tp->undo_retrans || tcp_packet_delayed(tp));
2174} 2320}
2175 2321
2176/* People celebrate: "We love our President!" */ 2322/* People celebrate: "We love our President!" */
@@ -2220,7 +2366,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2220{ 2366{
2221 struct tcp_sock *tp = tcp_sk(sk); 2367 struct tcp_sock *tp = tcp_sk(sk);
2222 /* Partial ACK arrived. Force Hoe's retransmit. */ 2368 /* Partial ACK arrived. Force Hoe's retransmit. */
2223 int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering; 2369 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2224 2370
2225 if (tcp_may_undo(tp)) { 2371 if (tcp_may_undo(tp)) {
2226 /* Plain luck! Hole if filled with delayed 2372 /* Plain luck! Hole if filled with delayed
@@ -2289,7 +2435,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2289 if (tp->retrans_out == 0) 2435 if (tp->retrans_out == 0)
2290 tp->retrans_stamp = 0; 2436 tp->retrans_stamp = 0;
2291 2437
2292 if (flag&FLAG_ECE) 2438 if (flag & FLAG_ECE)
2293 tcp_enter_cwr(sk, 1); 2439 tcp_enter_cwr(sk, 1);
2294 2440
2295 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2441 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
@@ -2335,7 +2481,6 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
2335 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 2481 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2336} 2482}
2337 2483
2338
2339/* Process an event, which can update packets-in-flight not trivially. 2484/* Process an event, which can update packets-in-flight not trivially.
2340 * Main goal of this function is to calculate new estimate for left_out, 2485 * Main goal of this function is to calculate new estimate for left_out,
2341 * taking into account both packets sitting in receiver's buffer and 2486 * taking into account both packets sitting in receiver's buffer and
@@ -2347,38 +2492,35 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
2347 * It does _not_ decide what to send, it is made in function 2492 * It does _not_ decide what to send, it is made in function
2348 * tcp_xmit_retransmit_queue(). 2493 * tcp_xmit_retransmit_queue().
2349 */ 2494 */
2350static void 2495static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2351tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2352{ 2496{
2353 struct inet_connection_sock *icsk = inet_csk(sk); 2497 struct inet_connection_sock *icsk = inet_csk(sk);
2354 struct tcp_sock *tp = tcp_sk(sk); 2498 struct tcp_sock *tp = tcp_sk(sk);
2355 int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP)); 2499 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2356 int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) && 2500 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2357 (tp->fackets_out > tp->reordering)); 2501 (tcp_fackets_out(tp) > tp->reordering));
2502 int fast_rexmit = 0;
2358 2503
2359 /* Some technical things: 2504 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2360 * 1. Reno does not count dupacks (sacked_out) automatically. */
2361 if (!tp->packets_out)
2362 tp->sacked_out = 0; 2505 tp->sacked_out = 0;
2363
2364 if (WARN_ON(!tp->sacked_out && tp->fackets_out)) 2506 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2365 tp->fackets_out = 0; 2507 tp->fackets_out = 0;
2366 2508
2367 /* Now state machine starts. 2509 /* Now state machine starts.
2368 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ 2510 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
2369 if (flag&FLAG_ECE) 2511 if (flag & FLAG_ECE)
2370 tp->prior_ssthresh = 0; 2512 tp->prior_ssthresh = 0;
2371 2513
2372 /* B. In all the states check for reneging SACKs. */ 2514 /* B. In all the states check for reneging SACKs. */
2373 if (tp->sacked_out && tcp_check_sack_reneging(sk)) 2515 if (tcp_check_sack_reneging(sk, flag))
2374 return; 2516 return;
2375 2517
2376 /* C. Process data loss notification, provided it is valid. */ 2518 /* C. Process data loss notification, provided it is valid. */
2377 if ((flag&FLAG_DATA_LOST) && 2519 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2378 before(tp->snd_una, tp->high_seq) && 2520 before(tp->snd_una, tp->high_seq) &&
2379 icsk->icsk_ca_state != TCP_CA_Open && 2521 icsk->icsk_ca_state != TCP_CA_Open &&
2380 tp->fackets_out > tp->reordering) { 2522 tp->fackets_out > tp->reordering) {
2381 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); 2523 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
2382 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); 2524 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
2383 } 2525 }
2384 2526
@@ -2438,7 +2580,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2438 do_lost = tcp_try_undo_partial(sk, pkts_acked); 2580 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2439 break; 2581 break;
2440 case TCP_CA_Loss: 2582 case TCP_CA_Loss:
2441 if (flag&FLAG_DATA_ACKED) 2583 if (flag & FLAG_DATA_ACKED)
2442 icsk->icsk_retransmits = 0; 2584 icsk->icsk_retransmits = 0;
2443 if (!tcp_try_undo_loss(sk)) { 2585 if (!tcp_try_undo_loss(sk)) {
2444 tcp_moderate_cwnd(tp); 2586 tcp_moderate_cwnd(tp);
@@ -2488,7 +2630,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2488 tp->undo_retrans = tp->retrans_out; 2630 tp->undo_retrans = tp->retrans_out;
2489 2631
2490 if (icsk->icsk_ca_state < TCP_CA_CWR) { 2632 if (icsk->icsk_ca_state < TCP_CA_CWR) {
2491 if (!(flag&FLAG_ECE)) 2633 if (!(flag & FLAG_ECE))
2492 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2634 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2493 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 2635 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2494 TCP_ECN_queue_cwr(tp); 2636 TCP_ECN_queue_cwr(tp);
@@ -2497,10 +2639,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2497 tp->bytes_acked = 0; 2639 tp->bytes_acked = 0;
2498 tp->snd_cwnd_cnt = 0; 2640 tp->snd_cwnd_cnt = 0;
2499 tcp_set_ca_state(sk, TCP_CA_Recovery); 2641 tcp_set_ca_state(sk, TCP_CA_Recovery);
2642 fast_rexmit = 1;
2500 } 2643 }
2501 2644
2502 if (do_lost || tcp_head_timedout(sk)) 2645 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
2503 tcp_update_scoreboard(sk); 2646 tcp_update_scoreboard(sk, fast_rexmit);
2504 tcp_cwnd_down(sk, flag); 2647 tcp_cwnd_down(sk, flag);
2505 tcp_xmit_retransmit_queue(sk); 2648 tcp_xmit_retransmit_queue(sk);
2506} 2649}
@@ -2564,11 +2707,10 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2564 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2707 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2565} 2708}
2566 2709
2567static void tcp_cong_avoid(struct sock *sk, u32 ack, 2710static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
2568 u32 in_flight, int good)
2569{ 2711{
2570 const struct inet_connection_sock *icsk = inet_csk(sk); 2712 const struct inet_connection_sock *icsk = inet_csk(sk);
2571 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good); 2713 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
2572 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; 2714 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2573} 2715}
2574 2716
@@ -2582,7 +2724,8 @@ static void tcp_rearm_rto(struct sock *sk)
2582 if (!tp->packets_out) { 2724 if (!tp->packets_out) {
2583 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 2725 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2584 } else { 2726 } else {
2585 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 2727 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2728 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2586 } 2729 }
2587} 2730}
2588 2731
@@ -2611,7 +2754,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2611 * is before the ack sequence we can discard it as it's confirmed to have 2754 * is before the ack sequence we can discard it as it's confirmed to have
2612 * arrived at the other end. 2755 * arrived at the other end.
2613 */ 2756 */
2614static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) 2757static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
2615{ 2758{
2616 struct tcp_sock *tp = tcp_sk(sk); 2759 struct tcp_sock *tp = tcp_sk(sk);
2617 const struct inet_connection_sock *icsk = inet_csk(sk); 2760 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2619,29 +2762,32 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
2619 u32 now = tcp_time_stamp; 2762 u32 now = tcp_time_stamp;
2620 int fully_acked = 1; 2763 int fully_acked = 1;
2621 int flag = 0; 2764 int flag = 0;
2622 int prior_packets = tp->packets_out; 2765 u32 pkts_acked = 0;
2766 u32 reord = tp->packets_out;
2623 s32 seq_rtt = -1; 2767 s32 seq_rtt = -1;
2768 s32 ca_seq_rtt = -1;
2624 ktime_t last_ackt = net_invalid_timestamp(); 2769 ktime_t last_ackt = net_invalid_timestamp();
2625 2770
2626 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 2771 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
2627 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 2772 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2628 u32 end_seq; 2773 u32 end_seq;
2629 u32 packets_acked; 2774 u32 acked_pcount;
2630 u8 sacked = scb->sacked; 2775 u8 sacked = scb->sacked;
2631 2776
2777 /* Determine how many packets and what bytes were acked, tso and else */
2632 if (after(scb->end_seq, tp->snd_una)) { 2778 if (after(scb->end_seq, tp->snd_una)) {
2633 if (tcp_skb_pcount(skb) == 1 || 2779 if (tcp_skb_pcount(skb) == 1 ||
2634 !after(tp->snd_una, scb->seq)) 2780 !after(tp->snd_una, scb->seq))
2635 break; 2781 break;
2636 2782
2637 packets_acked = tcp_tso_acked(sk, skb); 2783 acked_pcount = tcp_tso_acked(sk, skb);
2638 if (!packets_acked) 2784 if (!acked_pcount)
2639 break; 2785 break;
2640 2786
2641 fully_acked = 0; 2787 fully_acked = 0;
2642 end_seq = tp->snd_una; 2788 end_seq = tp->snd_una;
2643 } else { 2789 } else {
2644 packets_acked = tcp_skb_pcount(skb); 2790 acked_pcount = tcp_skb_pcount(skb);
2645 end_seq = scb->end_seq; 2791 end_seq = scb->end_seq;
2646 } 2792 }
2647 2793
@@ -2651,35 +2797,34 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
2651 tcp_mtup_probe_success(sk, skb); 2797 tcp_mtup_probe_success(sk, skb);
2652 } 2798 }
2653 2799
2654 if (sacked) { 2800 if (sacked & TCPCB_RETRANS) {
2655 if (sacked & TCPCB_RETRANS) { 2801 if (sacked & TCPCB_SACKED_RETRANS)
2656 if (sacked & TCPCB_SACKED_RETRANS) 2802 tp->retrans_out -= acked_pcount;
2657 tp->retrans_out -= packets_acked; 2803 flag |= FLAG_RETRANS_DATA_ACKED;
2658 flag |= FLAG_RETRANS_DATA_ACKED; 2804 ca_seq_rtt = -1;
2659 seq_rtt = -1; 2805 seq_rtt = -1;
2660 if ((flag & FLAG_DATA_ACKED) || 2806 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
2661 (packets_acked > 1)) 2807 flag |= FLAG_NONHEAD_RETRANS_ACKED;
2662 flag |= FLAG_NONHEAD_RETRANS_ACKED; 2808 } else {
2663 } else if (seq_rtt < 0) { 2809 ca_seq_rtt = now - scb->when;
2664 seq_rtt = now - scb->when; 2810 last_ackt = skb->tstamp;
2665 if (fully_acked) 2811 if (seq_rtt < 0) {
2666 last_ackt = skb->tstamp; 2812 seq_rtt = ca_seq_rtt;
2667 } 2813 }
2668 2814 if (!(sacked & TCPCB_SACKED_ACKED))
2669 if (sacked & TCPCB_SACKED_ACKED) 2815 reord = min(pkts_acked, reord);
2670 tp->sacked_out -= packets_acked;
2671 if (sacked & TCPCB_LOST)
2672 tp->lost_out -= packets_acked;
2673
2674 if ((sacked & TCPCB_URG) && tp->urg_mode &&
2675 !before(end_seq, tp->snd_up))
2676 tp->urg_mode = 0;
2677 } else if (seq_rtt < 0) {
2678 seq_rtt = now - scb->when;
2679 if (fully_acked)
2680 last_ackt = skb->tstamp;
2681 } 2816 }
2682 tp->packets_out -= packets_acked; 2817
2818 if (sacked & TCPCB_SACKED_ACKED)
2819 tp->sacked_out -= acked_pcount;
2820 if (sacked & TCPCB_LOST)
2821 tp->lost_out -= acked_pcount;
2822
2823 if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
2824 tp->urg_mode = 0;
2825
2826 tp->packets_out -= acked_pcount;
2827 pkts_acked += acked_pcount;
2683 2828
2684 /* Initial outgoing SYN's get put onto the write_queue 2829 /* Initial outgoing SYN's get put onto the write_queue
2685 * just like anything else we transmit. It is not 2830 * just like anything else we transmit. It is not
@@ -2699,24 +2844,29 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
2699 break; 2844 break;
2700 2845
2701 tcp_unlink_write_queue(skb, sk); 2846 tcp_unlink_write_queue(skb, sk);
2702 sk_stream_free_skb(sk, skb); 2847 sk_wmem_free_skb(sk, skb);
2703 tcp_clear_all_retrans_hints(tp); 2848 tcp_clear_all_retrans_hints(tp);
2704 } 2849 }
2705 2850
2851 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2852 flag |= FLAG_SACK_RENEGING;
2853
2706 if (flag & FLAG_ACKED) { 2854 if (flag & FLAG_ACKED) {
2707 u32 pkts_acked = prior_packets - tp->packets_out;
2708 const struct tcp_congestion_ops *ca_ops 2855 const struct tcp_congestion_ops *ca_ops
2709 = inet_csk(sk)->icsk_ca_ops; 2856 = inet_csk(sk)->icsk_ca_ops;
2710 2857
2711 tcp_ack_update_rtt(sk, flag, seq_rtt); 2858 tcp_ack_update_rtt(sk, flag, seq_rtt);
2712 tcp_rearm_rto(sk); 2859 tcp_rearm_rto(sk);
2713 2860
2714 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 2861 if (tcp_is_reno(tp)) {
2715 /* hint's skb might be NULL but we don't need to care */
2716 tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
2717 tp->fastpath_cnt_hint);
2718 if (tcp_is_reno(tp))
2719 tcp_remove_reno_sacks(sk, pkts_acked); 2862 tcp_remove_reno_sacks(sk, pkts_acked);
2863 } else {
2864 /* Non-retransmitted hole got filled? That's reordering */
2865 if (reord < prior_fackets)
2866 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
2867 }
2868
2869 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
2720 2870
2721 if (ca_ops->pkts_acked) { 2871 if (ca_ops->pkts_acked) {
2722 s32 rtt_us = -1; 2872 s32 rtt_us = -1;
@@ -2729,8 +2879,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
2729 net_invalid_timestamp())) 2879 net_invalid_timestamp()))
2730 rtt_us = ktime_us_delta(ktime_get_real(), 2880 rtt_us = ktime_us_delta(ktime_get_real(),
2731 last_ackt); 2881 last_ackt);
2732 else if (seq_rtt > 0) 2882 else if (ca_seq_rtt > 0)
2733 rtt_us = jiffies_to_usecs(seq_rtt); 2883 rtt_us = jiffies_to_usecs(ca_seq_rtt);
2734 } 2884 }
2735 2885
2736 ca_ops->pkts_acked(sk, pkts_acked, rtt_us); 2886 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
@@ -2760,7 +2910,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
2760 } 2910 }
2761 } 2911 }
2762#endif 2912#endif
2763 *seq_rtt_p = seq_rtt;
2764 return flag; 2913 return flag;
2765} 2914}
2766 2915
@@ -2771,8 +2920,7 @@ static void tcp_ack_probe(struct sock *sk)
2771 2920
2772 /* Was it a usable window open? */ 2921 /* Was it a usable window open? */
2773 2922
2774 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, 2923 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
2775 tp->snd_una + tp->snd_wnd)) {
2776 icsk->icsk_backoff = 0; 2924 icsk->icsk_backoff = 0;
2777 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); 2925 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
2778 /* Socket must be waked up by subsequent tcp_data_snd_check(). 2926 /* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -2801,8 +2949,9 @@ static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
2801/* Check that window update is acceptable. 2949/* Check that window update is acceptable.
2802 * The function assumes that snd_una<=ack<=snd_next. 2950 * The function assumes that snd_una<=ack<=snd_next.
2803 */ 2951 */
2804static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, 2952static inline int tcp_may_update_window(const struct tcp_sock *tp,
2805 const u32 ack_seq, const u32 nwin) 2953 const u32 ack, const u32 ack_seq,
2954 const u32 nwin)
2806{ 2955{
2807 return (after(ack, tp->snd_una) || 2956 return (after(ack, tp->snd_una) ||
2808 after(ack_seq, tp->snd_wl1) || 2957 after(ack_seq, tp->snd_wl1) ||
@@ -2871,7 +3020,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
2871 3020
2872static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3021static void tcp_undo_spur_to_response(struct sock *sk, int flag)
2873{ 3022{
2874 if (flag&FLAG_ECE) 3023 if (flag & FLAG_ECE)
2875 tcp_ratehalving_spur_to_response(sk); 3024 tcp_ratehalving_spur_to_response(sk);
2876 else 3025 else
2877 tcp_undo_cwr(sk, 1); 3026 tcp_undo_cwr(sk, 1);
@@ -2914,7 +3063,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
2914 tcp_verify_left_out(tp); 3063 tcp_verify_left_out(tp);
2915 3064
2916 /* Duplicate the behavior from Loss state (fastretrans_alert) */ 3065 /* Duplicate the behavior from Loss state (fastretrans_alert) */
2917 if (flag&FLAG_DATA_ACKED) 3066 if (flag & FLAG_DATA_ACKED)
2918 inet_csk(sk)->icsk_retransmits = 0; 3067 inet_csk(sk)->icsk_retransmits = 0;
2919 3068
2920 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || 3069 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
@@ -2931,16 +3080,16 @@ static int tcp_process_frto(struct sock *sk, int flag)
2931 * ACK isn't duplicate nor advances window, e.g., opposite dir 3080 * ACK isn't duplicate nor advances window, e.g., opposite dir
2932 * data, winupdate 3081 * data, winupdate
2933 */ 3082 */
2934 if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP)) 3083 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
2935 return 1; 3084 return 1;
2936 3085
2937 if (!(flag&FLAG_DATA_ACKED)) { 3086 if (!(flag & FLAG_DATA_ACKED)) {
2938 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), 3087 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
2939 flag); 3088 flag);
2940 return 1; 3089 return 1;
2941 } 3090 }
2942 } else { 3091 } else {
2943 if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { 3092 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
2944 /* Prevent sending of new data. */ 3093 /* Prevent sending of new data. */
2945 tp->snd_cwnd = min(tp->snd_cwnd, 3094 tp->snd_cwnd = min(tp->snd_cwnd,
2946 tcp_packets_in_flight(tp)); 3095 tcp_packets_in_flight(tp));
@@ -2948,10 +3097,12 @@ static int tcp_process_frto(struct sock *sk, int flag)
2948 } 3097 }
2949 3098
2950 if ((tp->frto_counter >= 2) && 3099 if ((tp->frto_counter >= 2) &&
2951 (!(flag&FLAG_FORWARD_PROGRESS) || 3100 (!(flag & FLAG_FORWARD_PROGRESS) ||
2952 ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { 3101 ((flag & FLAG_DATA_SACKED) &&
3102 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
2953 /* RFC4138 shortcoming (see comment above) */ 3103 /* RFC4138 shortcoming (see comment above) */
2954 if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP)) 3104 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3105 (flag & FLAG_NOT_DUP))
2955 return 1; 3106 return 1;
2956 3107
2957 tcp_enter_frto_loss(sk, 3, flag); 3108 tcp_enter_frto_loss(sk, 3, flag);
@@ -2960,17 +3111,13 @@ static int tcp_process_frto(struct sock *sk, int flag)
2960 } 3111 }
2961 3112
2962 if (tp->frto_counter == 1) { 3113 if (tp->frto_counter == 1) {
2963 /* Sending of the next skb must be allowed or no F-RTO */ 3114 /* tcp_may_send_now needs to see updated state */
2964 if (!tcp_send_head(sk) ||
2965 after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
2966 tp->snd_una + tp->snd_wnd)) {
2967 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
2968 flag);
2969 return 1;
2970 }
2971
2972 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; 3115 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
2973 tp->frto_counter = 2; 3116 tp->frto_counter = 2;
3117
3118 if (!tcp_may_send_now(sk))
3119 tcp_enter_frto_loss(sk, 2, flag);
3120
2974 return 1; 3121 return 1;
2975 } else { 3122 } else {
2976 switch (sysctl_tcp_frto_response) { 3123 switch (sysctl_tcp_frto_response) {
@@ -3000,7 +3147,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3000 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3147 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3001 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3148 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3002 u32 prior_in_flight; 3149 u32 prior_in_flight;
3003 s32 seq_rtt; 3150 u32 prior_fackets;
3004 int prior_packets; 3151 int prior_packets;
3005 int frto_cwnd = 0; 3152 int frto_cwnd = 0;
3006 3153
@@ -3021,10 +3168,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3021 tp->bytes_acked += ack - prior_snd_una; 3168 tp->bytes_acked += ack - prior_snd_una;
3022 else if (icsk->icsk_ca_state == TCP_CA_Loss) 3169 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3023 /* we assume just one segment left network */ 3170 /* we assume just one segment left network */
3024 tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache); 3171 tp->bytes_acked += min(ack - prior_snd_una,
3172 tp->mss_cache);
3025 } 3173 }
3026 3174
3027 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { 3175 prior_fackets = tp->fackets_out;
3176 prior_in_flight = tcp_packets_in_flight(tp);
3177
3178 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3028 /* Window is constant, pure forward advance. 3179 /* Window is constant, pure forward advance.
3029 * No more checks are required. 3180 * No more checks are required.
3030 * Note, we use the fact that SND.UNA>=SND.WL2. 3181 * Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3062,29 +3213,28 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3062 if (!prior_packets) 3213 if (!prior_packets)
3063 goto no_queue; 3214 goto no_queue;
3064 3215
3065 prior_in_flight = tcp_packets_in_flight(tp);
3066
3067 /* See if we can take anything off of the retransmit queue. */ 3216 /* See if we can take anything off of the retransmit queue. */
3068 flag |= tcp_clean_rtx_queue(sk, &seq_rtt); 3217 flag |= tcp_clean_rtx_queue(sk, prior_fackets);
3069 3218
3219 if (tp->frto_counter)
3220 frto_cwnd = tcp_process_frto(sk, flag);
3070 /* Guarantee sacktag reordering detection against wrap-arounds */ 3221 /* Guarantee sacktag reordering detection against wrap-arounds */
3071 if (before(tp->frto_highmark, tp->snd_una)) 3222 if (before(tp->frto_highmark, tp->snd_una))
3072 tp->frto_highmark = 0; 3223 tp->frto_highmark = 0;
3073 if (tp->frto_counter)
3074 frto_cwnd = tcp_process_frto(sk, flag);
3075 3224
3076 if (tcp_ack_is_dubious(sk, flag)) { 3225 if (tcp_ack_is_dubious(sk, flag)) {
3077 /* Advance CWND, if state allows this. */ 3226 /* Advance CWND, if state allows this. */
3078 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3227 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3079 tcp_may_raise_cwnd(sk, flag)) 3228 tcp_may_raise_cwnd(sk, flag))
3080 tcp_cong_avoid(sk, ack, prior_in_flight, 0); 3229 tcp_cong_avoid(sk, ack, prior_in_flight);
3081 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag); 3230 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3231 flag);
3082 } else { 3232 } else {
3083 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3233 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3084 tcp_cong_avoid(sk, ack, prior_in_flight, 1); 3234 tcp_cong_avoid(sk, ack, prior_in_flight);
3085 } 3235 }
3086 3236
3087 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) 3237 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3088 dst_confirm(sk->sk_dst_cache); 3238 dst_confirm(sk->sk_dst_cache);
3089 3239
3090 return 1; 3240 return 1;
@@ -3109,100 +3259,99 @@ uninteresting_ack:
3109 return 0; 3259 return 0;
3110} 3260}
3111 3261
3112
3113/* Look for tcp options. Normally only called on SYN and SYNACK packets. 3262/* Look for tcp options. Normally only called on SYN and SYNACK packets.
3114 * But, this can also be called on packets in the established flow when 3263 * But, this can also be called on packets in the established flow when
3115 * the fast version below fails. 3264 * the fast version below fails.
3116 */ 3265 */
3117void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab) 3266void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3267 int estab)
3118{ 3268{
3119 unsigned char *ptr; 3269 unsigned char *ptr;
3120 struct tcphdr *th = tcp_hdr(skb); 3270 struct tcphdr *th = tcp_hdr(skb);
3121 int length=(th->doff*4)-sizeof(struct tcphdr); 3271 int length = (th->doff * 4) - sizeof(struct tcphdr);
3122 3272
3123 ptr = (unsigned char *)(th + 1); 3273 ptr = (unsigned char *)(th + 1);
3124 opt_rx->saw_tstamp = 0; 3274 opt_rx->saw_tstamp = 0;
3125 3275
3126 while (length > 0) { 3276 while (length > 0) {
3127 int opcode=*ptr++; 3277 int opcode = *ptr++;
3128 int opsize; 3278 int opsize;
3129 3279
3130 switch (opcode) { 3280 switch (opcode) {
3131 case TCPOPT_EOL: 3281 case TCPOPT_EOL:
3282 return;
3283 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
3284 length--;
3285 continue;
3286 default:
3287 opsize = *ptr++;
3288 if (opsize < 2) /* "silly options" */
3132 return; 3289 return;
3133 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 3290 if (opsize > length)
3134 length--; 3291 return; /* don't parse partial options */
3135 continue; 3292 switch (opcode) {
3136 default: 3293 case TCPOPT_MSS:
3137 opsize=*ptr++; 3294 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3138 if (opsize < 2) /* "silly options" */ 3295 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
3139 return; 3296 if (in_mss) {
3140 if (opsize > length) 3297 if (opt_rx->user_mss &&
3141 return; /* don't parse partial options */ 3298 opt_rx->user_mss < in_mss)
3142 switch (opcode) { 3299 in_mss = opt_rx->user_mss;
3143 case TCPOPT_MSS: 3300 opt_rx->mss_clamp = in_mss;
3144 if (opsize==TCPOLEN_MSS && th->syn && !estab) {
3145 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
3146 if (in_mss) {
3147 if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
3148 in_mss = opt_rx->user_mss;
3149 opt_rx->mss_clamp = in_mss;
3150 }
3151 } 3301 }
3152 break; 3302 }
3153 case TCPOPT_WINDOW: 3303 break;
3154 if (opsize==TCPOLEN_WINDOW && th->syn && !estab) 3304 case TCPOPT_WINDOW:
3155 if (sysctl_tcp_window_scaling) { 3305 if (opsize == TCPOLEN_WINDOW && th->syn &&
3156 __u8 snd_wscale = *(__u8 *) ptr; 3306 !estab && sysctl_tcp_window_scaling) {
3157 opt_rx->wscale_ok = 1; 3307 __u8 snd_wscale = *(__u8 *)ptr;
3158 if (snd_wscale > 14) { 3308 opt_rx->wscale_ok = 1;
3159 if (net_ratelimit()) 3309 if (snd_wscale > 14) {
3160 printk(KERN_INFO "tcp_parse_options: Illegal window " 3310 if (net_ratelimit())
3161 "scaling value %d >14 received.\n", 3311 printk(KERN_INFO "tcp_parse_options: Illegal window "
3162 snd_wscale); 3312 "scaling value %d >14 received.\n",
3163 snd_wscale = 14; 3313 snd_wscale);
3164 } 3314 snd_wscale = 14;
3165 opt_rx->snd_wscale = snd_wscale;
3166 }
3167 break;
3168 case TCPOPT_TIMESTAMP:
3169 if (opsize==TCPOLEN_TIMESTAMP) {
3170 if ((estab && opt_rx->tstamp_ok) ||
3171 (!estab && sysctl_tcp_timestamps)) {
3172 opt_rx->saw_tstamp = 1;
3173 opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
3174 opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
3175 }
3176 }
3177 break;
3178 case TCPOPT_SACK_PERM:
3179 if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
3180 if (sysctl_tcp_sack) {
3181 opt_rx->sack_ok = 1;
3182 tcp_sack_reset(opt_rx);
3183 }
3184 } 3315 }
3185 break; 3316 opt_rx->snd_wscale = snd_wscale;
3317 }
3318 break;
3319 case TCPOPT_TIMESTAMP:
3320 if ((opsize == TCPOLEN_TIMESTAMP) &&
3321 ((estab && opt_rx->tstamp_ok) ||
3322 (!estab && sysctl_tcp_timestamps))) {
3323 opt_rx->saw_tstamp = 1;
3324 opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
3325 opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
3326 }
3327 break;
3328 case TCPOPT_SACK_PERM:
3329 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3330 !estab && sysctl_tcp_sack) {
3331 opt_rx->sack_ok = 1;
3332 tcp_sack_reset(opt_rx);
3333 }
3334 break;
3186 3335
3187 case TCPOPT_SACK: 3336 case TCPOPT_SACK:
3188 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && 3337 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3189 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && 3338 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3190 opt_rx->sack_ok) { 3339 opt_rx->sack_ok) {
3191 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; 3340 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3192 } 3341 }
3193 break; 3342 break;
3194#ifdef CONFIG_TCP_MD5SIG 3343#ifdef CONFIG_TCP_MD5SIG
3195 case TCPOPT_MD5SIG: 3344 case TCPOPT_MD5SIG:
3196 /* 3345 /*
3197 * The MD5 Hash has already been 3346 * The MD5 Hash has already been
3198 * checked (see tcp_v{4,6}_do_rcv()). 3347 * checked (see tcp_v{4,6}_do_rcv()).
3199 */ 3348 */
3200 break; 3349 break;
3201#endif 3350#endif
3202 } 3351 }
3203 3352
3204 ptr+=opsize-2; 3353 ptr += opsize-2;
3205 length-=opsize; 3354 length -= opsize;
3206 } 3355 }
3207 } 3356 }
3208} 3357}
@@ -3213,7 +3362,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3213static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 3362static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3214 struct tcp_sock *tp) 3363 struct tcp_sock *tp)
3215{ 3364{
3216 if (th->doff == sizeof(struct tcphdr)>>2) { 3365 if (th->doff == sizeof(struct tcphdr) >> 2) {
3217 tp->rx_opt.saw_tstamp = 0; 3366 tp->rx_opt.saw_tstamp = 0;
3218 return 0; 3367 return 0;
3219 } else if (tp->rx_opt.tstamp_ok && 3368 } else if (tp->rx_opt.tstamp_ok &&
@@ -3298,7 +3447,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3298 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); 3447 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3299} 3448}
3300 3449
3301static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) 3450static inline int tcp_paws_discard(const struct sock *sk,
3451 const struct sk_buff *skb)
3302{ 3452{
3303 const struct tcp_sock *tp = tcp_sk(sk); 3453 const struct tcp_sock *tp = tcp_sk(sk);
3304 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && 3454 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
@@ -3330,16 +3480,16 @@ static void tcp_reset(struct sock *sk)
3330{ 3480{
3331 /* We want the right error as BSD sees it (and indeed as we do). */ 3481 /* We want the right error as BSD sees it (and indeed as we do). */
3332 switch (sk->sk_state) { 3482 switch (sk->sk_state) {
3333 case TCP_SYN_SENT: 3483 case TCP_SYN_SENT:
3334 sk->sk_err = ECONNREFUSED; 3484 sk->sk_err = ECONNREFUSED;
3335 break; 3485 break;
3336 case TCP_CLOSE_WAIT: 3486 case TCP_CLOSE_WAIT:
3337 sk->sk_err = EPIPE; 3487 sk->sk_err = EPIPE;
3338 break; 3488 break;
3339 case TCP_CLOSE: 3489 case TCP_CLOSE:
3340 return; 3490 return;
3341 default: 3491 default:
3342 sk->sk_err = ECONNRESET; 3492 sk->sk_err = ECONNRESET;
3343 } 3493 }
3344 3494
3345 if (!sock_flag(sk, SOCK_DEAD)) 3495 if (!sock_flag(sk, SOCK_DEAD))
@@ -3372,43 +3522,43 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3372 sock_set_flag(sk, SOCK_DONE); 3522 sock_set_flag(sk, SOCK_DONE);
3373 3523
3374 switch (sk->sk_state) { 3524 switch (sk->sk_state) {
3375 case TCP_SYN_RECV: 3525 case TCP_SYN_RECV:
3376 case TCP_ESTABLISHED: 3526 case TCP_ESTABLISHED:
3377 /* Move to CLOSE_WAIT */ 3527 /* Move to CLOSE_WAIT */
3378 tcp_set_state(sk, TCP_CLOSE_WAIT); 3528 tcp_set_state(sk, TCP_CLOSE_WAIT);
3379 inet_csk(sk)->icsk_ack.pingpong = 1; 3529 inet_csk(sk)->icsk_ack.pingpong = 1;
3380 break; 3530 break;
3381 3531
3382 case TCP_CLOSE_WAIT: 3532 case TCP_CLOSE_WAIT:
3383 case TCP_CLOSING: 3533 case TCP_CLOSING:
3384 /* Received a retransmission of the FIN, do 3534 /* Received a retransmission of the FIN, do
3385 * nothing. 3535 * nothing.
3386 */ 3536 */
3387 break; 3537 break;
3388 case TCP_LAST_ACK: 3538 case TCP_LAST_ACK:
3389 /* RFC793: Remain in the LAST-ACK state. */ 3539 /* RFC793: Remain in the LAST-ACK state. */
3390 break; 3540 break;
3391 3541
3392 case TCP_FIN_WAIT1: 3542 case TCP_FIN_WAIT1:
3393 /* This case occurs when a simultaneous close 3543 /* This case occurs when a simultaneous close
3394 * happens, we must ack the received FIN and 3544 * happens, we must ack the received FIN and
3395 * enter the CLOSING state. 3545 * enter the CLOSING state.
3396 */ 3546 */
3397 tcp_send_ack(sk); 3547 tcp_send_ack(sk);
3398 tcp_set_state(sk, TCP_CLOSING); 3548 tcp_set_state(sk, TCP_CLOSING);
3399 break; 3549 break;
3400 case TCP_FIN_WAIT2: 3550 case TCP_FIN_WAIT2:
3401 /* Received a FIN -- send ACK and enter TIME_WAIT. */ 3551 /* Received a FIN -- send ACK and enter TIME_WAIT. */
3402 tcp_send_ack(sk); 3552 tcp_send_ack(sk);
3403 tcp_time_wait(sk, TCP_TIME_WAIT, 0); 3553 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3404 break; 3554 break;
3405 default: 3555 default:
3406 /* Only TCP_LISTEN and TCP_CLOSE are left, in these 3556 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
3407 * cases we should never reach this piece of code. 3557 * cases we should never reach this piece of code.
3408 */ 3558 */
3409 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", 3559 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
3410 __FUNCTION__, sk->sk_state); 3560 __FUNCTION__, sk->sk_state);
3411 break; 3561 break;
3412 } 3562 }
3413 3563
3414 /* It _is_ possible, that we have something out-of-order _after_ FIN. 3564 /* It _is_ possible, that we have something out-of-order _after_ FIN.
@@ -3417,7 +3567,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3417 __skb_queue_purge(&tp->out_of_order_queue); 3567 __skb_queue_purge(&tp->out_of_order_queue);
3418 if (tcp_is_sack(tp)) 3568 if (tcp_is_sack(tp))
3419 tcp_sack_reset(&tp->rx_opt); 3569 tcp_sack_reset(&tp->rx_opt);
3420 sk_stream_mem_reclaim(sk); 3570 sk_mem_reclaim(sk);
3421 3571
3422 if (!sock_flag(sk, SOCK_DEAD)) { 3572 if (!sock_flag(sk, SOCK_DEAD)) {
3423 sk->sk_state_change(sk); 3573 sk->sk_state_change(sk);
@@ -3425,13 +3575,14 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3425 /* Do not send POLL_HUP for half duplex close. */ 3575 /* Do not send POLL_HUP for half duplex close. */
3426 if (sk->sk_shutdown == SHUTDOWN_MASK || 3576 if (sk->sk_shutdown == SHUTDOWN_MASK ||
3427 sk->sk_state == TCP_CLOSE) 3577 sk->sk_state == TCP_CLOSE)
3428 sk_wake_async(sk, 1, POLL_HUP); 3578 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
3429 else 3579 else
3430 sk_wake_async(sk, 1, POLL_IN); 3580 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3431 } 3581 }
3432} 3582}
3433 3583
3434static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) 3584static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
3585 u32 end_seq)
3435{ 3586{
3436 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { 3587 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
3437 if (before(seq, sp->start_seq)) 3588 if (before(seq, sp->start_seq))
@@ -3454,7 +3605,8 @@ static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
3454 tp->rx_opt.dsack = 1; 3605 tp->rx_opt.dsack = 1;
3455 tp->duplicate_sack[0].start_seq = seq; 3606 tp->duplicate_sack[0].start_seq = seq;
3456 tp->duplicate_sack[0].end_seq = end_seq; 3607 tp->duplicate_sack[0].end_seq = end_seq;
3457 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok); 3608 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
3609 4 - tp->rx_opt.tstamp_ok);
3458 } 3610 }
3459} 3611}
3460 3612
@@ -3494,12 +3646,12 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3494{ 3646{
3495 int this_sack; 3647 int this_sack;
3496 struct tcp_sack_block *sp = &tp->selective_acks[0]; 3648 struct tcp_sack_block *sp = &tp->selective_acks[0];
3497 struct tcp_sack_block *swalk = sp+1; 3649 struct tcp_sack_block *swalk = sp + 1;
3498 3650
3499 /* See if the recent change to the first SACK eats into 3651 /* See if the recent change to the first SACK eats into
3500 * or hits the sequence space of other SACK blocks, if so coalesce. 3652 * or hits the sequence space of other SACK blocks, if so coalesce.
3501 */ 3653 */
3502 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) { 3654 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
3503 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) { 3655 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
3504 int i; 3656 int i;
3505 3657
@@ -3507,16 +3659,19 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3507 * Decrease num_sacks. 3659 * Decrease num_sacks.
3508 */ 3660 */
3509 tp->rx_opt.num_sacks--; 3661 tp->rx_opt.num_sacks--;
3510 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); 3662 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
3511 for (i=this_sack; i < tp->rx_opt.num_sacks; i++) 3663 tp->rx_opt.dsack,
3512 sp[i] = sp[i+1]; 3664 4 - tp->rx_opt.tstamp_ok);
3665 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
3666 sp[i] = sp[i + 1];
3513 continue; 3667 continue;
3514 } 3668 }
3515 this_sack++, swalk++; 3669 this_sack++, swalk++;
3516 } 3670 }
3517} 3671}
3518 3672
3519static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) 3673static inline void tcp_sack_swap(struct tcp_sack_block *sack1,
3674 struct tcp_sack_block *sack2)
3520{ 3675{
3521 __u32 tmp; 3676 __u32 tmp;
3522 3677
@@ -3539,11 +3694,11 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3539 if (!cur_sacks) 3694 if (!cur_sacks)
3540 goto new_sack; 3695 goto new_sack;
3541 3696
3542 for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) { 3697 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
3543 if (tcp_sack_extend(sp, seq, end_seq)) { 3698 if (tcp_sack_extend(sp, seq, end_seq)) {
3544 /* Rotate this_sack to the first one. */ 3699 /* Rotate this_sack to the first one. */
3545 for (; this_sack>0; this_sack--, sp--) 3700 for (; this_sack > 0; this_sack--, sp--)
3546 tcp_sack_swap(sp, sp-1); 3701 tcp_sack_swap(sp, sp - 1);
3547 if (cur_sacks > 1) 3702 if (cur_sacks > 1)
3548 tcp_sack_maybe_coalesce(tp); 3703 tcp_sack_maybe_coalesce(tp);
3549 return; 3704 return;
@@ -3562,14 +3717,15 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3562 sp--; 3717 sp--;
3563 } 3718 }
3564 for (; this_sack > 0; this_sack--, sp--) 3719 for (; this_sack > 0; this_sack--, sp--)
3565 *sp = *(sp-1); 3720 *sp = *(sp - 1);
3566 3721
3567new_sack: 3722new_sack:
3568 /* Build the new head SACK, and we're done. */ 3723 /* Build the new head SACK, and we're done. */
3569 sp->start_seq = seq; 3724 sp->start_seq = seq;
3570 sp->end_seq = end_seq; 3725 sp->end_seq = end_seq;
3571 tp->rx_opt.num_sacks++; 3726 tp->rx_opt.num_sacks++;
3572 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); 3727 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
3728 4 - tp->rx_opt.tstamp_ok);
3573} 3729}
3574 3730
3575/* RCV.NXT advances, some SACKs should be eaten. */ 3731/* RCV.NXT advances, some SACKs should be eaten. */
@@ -3587,7 +3743,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
3587 return; 3743 return;
3588 } 3744 }
3589 3745
3590 for (this_sack = 0; this_sack < num_sacks; ) { 3746 for (this_sack = 0; this_sack < num_sacks;) {
3591 /* Check if the start of the sack is covered by RCV.NXT. */ 3747 /* Check if the start of the sack is covered by RCV.NXT. */
3592 if (!before(tp->rcv_nxt, sp->start_seq)) { 3748 if (!before(tp->rcv_nxt, sp->start_seq)) {
3593 int i; 3749 int i;
@@ -3606,7 +3762,9 @@ static void tcp_sack_remove(struct tcp_sock *tp)
3606 } 3762 }
3607 if (num_sacks != tp->rx_opt.num_sacks) { 3763 if (num_sacks != tp->rx_opt.num_sacks) {
3608 tp->rx_opt.num_sacks = num_sacks; 3764 tp->rx_opt.num_sacks = num_sacks;
3609 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); 3765 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
3766 tp->rx_opt.dsack,
3767 4 - tp->rx_opt.tstamp_ok);
3610 } 3768 }
3611} 3769}
3612 3770
@@ -3659,14 +3817,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3659 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) 3817 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
3660 goto drop; 3818 goto drop;
3661 3819
3662 __skb_pull(skb, th->doff*4); 3820 __skb_pull(skb, th->doff * 4);
3663 3821
3664 TCP_ECN_accept_cwr(tp, skb); 3822 TCP_ECN_accept_cwr(tp, skb);
3665 3823
3666 if (tp->rx_opt.dsack) { 3824 if (tp->rx_opt.dsack) {
3667 tp->rx_opt.dsack = 0; 3825 tp->rx_opt.dsack = 0;
3668 tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks, 3826 tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
3669 4 - tp->rx_opt.tstamp_ok); 3827 4 - tp->rx_opt.tstamp_ok);
3670 } 3828 }
3671 3829
3672 /* Queue data for delivery to the user. 3830 /* Queue data for delivery to the user.
@@ -3682,7 +3840,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3682 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && 3840 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
3683 sock_owned_by_user(sk) && !tp->urg_data) { 3841 sock_owned_by_user(sk) && !tp->urg_data) {
3684 int chunk = min_t(unsigned int, skb->len, 3842 int chunk = min_t(unsigned int, skb->len,
3685 tp->ucopy.len); 3843 tp->ucopy.len);
3686 3844
3687 __set_current_state(TASK_RUNNING); 3845 __set_current_state(TASK_RUNNING);
3688 3846
@@ -3700,12 +3858,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3700queue_and_out: 3858queue_and_out:
3701 if (eaten < 0 && 3859 if (eaten < 0 &&
3702 (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3860 (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3703 !sk_stream_rmem_schedule(sk, skb))) { 3861 !sk_rmem_schedule(sk, skb->truesize))) {
3704 if (tcp_prune_queue(sk) < 0 || 3862 if (tcp_prune_queue(sk) < 0 ||
3705 !sk_stream_rmem_schedule(sk, skb)) 3863 !sk_rmem_schedule(sk, skb->truesize))
3706 goto drop; 3864 goto drop;
3707 } 3865 }
3708 sk_stream_set_owner_r(skb, sk); 3866 skb_set_owner_r(skb, sk);
3709 __skb_queue_tail(&sk->sk_receive_queue, skb); 3867 __skb_queue_tail(&sk->sk_receive_queue, skb);
3710 } 3868 }
3711 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 3869 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -3774,9 +3932,9 @@ drop:
3774 TCP_ECN_check_ce(tp, skb); 3932 TCP_ECN_check_ce(tp, skb);
3775 3933
3776 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3934 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3777 !sk_stream_rmem_schedule(sk, skb)) { 3935 !sk_rmem_schedule(sk, skb->truesize)) {
3778 if (tcp_prune_queue(sk) < 0 || 3936 if (tcp_prune_queue(sk) < 0 ||
3779 !sk_stream_rmem_schedule(sk, skb)) 3937 !sk_rmem_schedule(sk, skb->truesize))
3780 goto drop; 3938 goto drop;
3781 } 3939 }
3782 3940
@@ -3787,7 +3945,7 @@ drop:
3787 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", 3945 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
3788 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 3946 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
3789 3947
3790 sk_stream_set_owner_r(skb, sk); 3948 skb_set_owner_r(skb, sk);
3791 3949
3792 if (!skb_peek(&tp->out_of_order_queue)) { 3950 if (!skb_peek(&tp->out_of_order_queue)) {
3793 /* Initial out of order segment, build 1 SACK. */ 3951 /* Initial out of order segment, build 1 SACK. */
@@ -3799,7 +3957,7 @@ drop:
3799 tp->selective_acks[0].end_seq = 3957 tp->selective_acks[0].end_seq =
3800 TCP_SKB_CB(skb)->end_seq; 3958 TCP_SKB_CB(skb)->end_seq;
3801 } 3959 }
3802 __skb_queue_head(&tp->out_of_order_queue,skb); 3960 __skb_queue_head(&tp->out_of_order_queue, skb);
3803 } else { 3961 } else {
3804 struct sk_buff *skb1 = tp->out_of_order_queue.prev; 3962 struct sk_buff *skb1 = tp->out_of_order_queue.prev;
3805 u32 seq = TCP_SKB_CB(skb)->seq; 3963 u32 seq = TCP_SKB_CB(skb)->seq;
@@ -3822,10 +3980,10 @@ drop:
3822 if (!after(TCP_SKB_CB(skb1)->seq, seq)) 3980 if (!after(TCP_SKB_CB(skb1)->seq, seq))
3823 break; 3981 break;
3824 } while ((skb1 = skb1->prev) != 3982 } while ((skb1 = skb1->prev) !=
3825 (struct sk_buff*)&tp->out_of_order_queue); 3983 (struct sk_buff *)&tp->out_of_order_queue);
3826 3984
3827 /* Do skb overlap to previous one? */ 3985 /* Do skb overlap to previous one? */
3828 if (skb1 != (struct sk_buff*)&tp->out_of_order_queue && 3986 if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
3829 before(seq, TCP_SKB_CB(skb1)->end_seq)) { 3987 before(seq, TCP_SKB_CB(skb1)->end_seq)) {
3830 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 3988 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3831 /* All the bits are present. Drop. */ 3989 /* All the bits are present. Drop. */
@@ -3835,7 +3993,8 @@ drop:
3835 } 3993 }
3836 if (after(seq, TCP_SKB_CB(skb1)->seq)) { 3994 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
3837 /* Partial overlap. */ 3995 /* Partial overlap. */
3838 tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq); 3996 tcp_dsack_set(tp, seq,
3997 TCP_SKB_CB(skb1)->end_seq);
3839 } else { 3998 } else {
3840 skb1 = skb1->prev; 3999 skb1 = skb1->prev;
3841 } 4000 }
@@ -3844,15 +4003,17 @@ drop:
3844 4003
3845 /* And clean segments covered by new one as whole. */ 4004 /* And clean segments covered by new one as whole. */
3846 while ((skb1 = skb->next) != 4005 while ((skb1 = skb->next) !=
3847 (struct sk_buff*)&tp->out_of_order_queue && 4006 (struct sk_buff *)&tp->out_of_order_queue &&
3848 after(end_seq, TCP_SKB_CB(skb1)->seq)) { 4007 after(end_seq, TCP_SKB_CB(skb1)->seq)) {
3849 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4008 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3850 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); 4009 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
3851 break; 4010 end_seq);
3852 } 4011 break;
3853 __skb_unlink(skb1, &tp->out_of_order_queue); 4012 }
3854 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); 4013 __skb_unlink(skb1, &tp->out_of_order_queue);
3855 __kfree_skb(skb1); 4014 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
4015 TCP_SKB_CB(skb1)->end_seq);
4016 __kfree_skb(skb1);
3856 } 4017 }
3857 4018
3858add_sack: 4019add_sack:
@@ -3875,7 +4036,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3875 4036
3876 /* First, check that queue is collapsible and find 4037 /* First, check that queue is collapsible and find
3877 * the point where collapsing can be useful. */ 4038 * the point where collapsing can be useful. */
3878 for (skb = head; skb != tail; ) { 4039 for (skb = head; skb != tail;) {
3879 /* No new bits? It is possible on ofo queue. */ 4040 /* No new bits? It is possible on ofo queue. */
3880 if (!before(start, TCP_SKB_CB(skb)->end_seq)) { 4041 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
3881 struct sk_buff *next = skb->next; 4042 struct sk_buff *next = skb->next;
@@ -3913,9 +4074,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3913 /* Too big header? This can happen with IPv6. */ 4074 /* Too big header? This can happen with IPv6. */
3914 if (copy < 0) 4075 if (copy < 0)
3915 return; 4076 return;
3916 if (end-start < copy) 4077 if (end - start < copy)
3917 copy = end-start; 4078 copy = end - start;
3918 nskb = alloc_skb(copy+header, GFP_ATOMIC); 4079 nskb = alloc_skb(copy + header, GFP_ATOMIC);
3919 if (!nskb) 4080 if (!nskb)
3920 return; 4081 return;
3921 4082
@@ -3929,7 +4090,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3929 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 4090 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
3930 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; 4091 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
3931 __skb_insert(nskb, skb->prev, skb, list); 4092 __skb_insert(nskb, skb->prev, skb, list);
3932 sk_stream_set_owner_r(nskb, sk); 4093 skb_set_owner_r(nskb, sk);
3933 4094
3934 /* Copy data, releasing collapsed skbs. */ 4095 /* Copy data, releasing collapsed skbs. */
3935 while (copy > 0) { 4096 while (copy > 0) {
@@ -4025,9 +4186,9 @@ static int tcp_prune_queue(struct sock *sk)
4025 tcp_collapse_ofo_queue(sk); 4186 tcp_collapse_ofo_queue(sk);
4026 tcp_collapse(sk, &sk->sk_receive_queue, 4187 tcp_collapse(sk, &sk->sk_receive_queue,
4027 sk->sk_receive_queue.next, 4188 sk->sk_receive_queue.next,
4028 (struct sk_buff*)&sk->sk_receive_queue, 4189 (struct sk_buff *)&sk->sk_receive_queue,
4029 tp->copied_seq, tp->rcv_nxt); 4190 tp->copied_seq, tp->rcv_nxt);
4030 sk_stream_mem_reclaim(sk); 4191 sk_mem_reclaim(sk);
4031 4192
4032 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 4193 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4033 return 0; 4194 return 0;
@@ -4047,7 +4208,7 @@ static int tcp_prune_queue(struct sock *sk)
4047 */ 4208 */
4048 if (tcp_is_sack(tp)) 4209 if (tcp_is_sack(tp))
4049 tcp_sack_reset(&tp->rx_opt); 4210 tcp_sack_reset(&tp->rx_opt);
4050 sk_stream_mem_reclaim(sk); 4211 sk_mem_reclaim(sk);
4051 } 4212 }
4052 4213
4053 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 4214 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
@@ -4064,7 +4225,6 @@ static int tcp_prune_queue(struct sock *sk)
4064 return -1; 4225 return -1;
4065} 4226}
4066 4227
4067
4068/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. 4228/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
4069 * As additional protections, we do not touch cwnd in retransmission phases, 4229 * As additional protections, we do not touch cwnd in retransmission phases,
4070 * and if application hit its sndbuf limit recently. 4230 * and if application hit its sndbuf limit recently.
@@ -4126,8 +4286,8 @@ static void tcp_new_space(struct sock *sk)
4126 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 4286 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4127 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), 4287 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
4128 demanded = max_t(unsigned int, tp->snd_cwnd, 4288 demanded = max_t(unsigned int, tp->snd_cwnd,
4129 tp->reordering + 1); 4289 tp->reordering + 1);
4130 sndmem *= 2*demanded; 4290 sndmem *= 2 * demanded;
4131 if (sndmem > sk->sk_sndbuf) 4291 if (sndmem > sk->sk_sndbuf)
4132 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); 4292 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4133 tp->snd_cwnd_stamp = tcp_time_stamp; 4293 tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -4168,8 +4328,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4168 /* We ACK each frame or... */ 4328 /* We ACK each frame or... */
4169 tcp_in_quickack_mode(sk) || 4329 tcp_in_quickack_mode(sk) ||
4170 /* We have out of order data. */ 4330 /* We have out of order data. */
4171 (ofo_possible && 4331 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4172 skb_peek(&tp->out_of_order_queue))) {
4173 /* Then ack it now */ 4332 /* Then ack it now */
4174 tcp_send_ack(sk); 4333 tcp_send_ack(sk);
4175 } else { 4334 } else {
@@ -4197,7 +4356,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
4197 * either form (or just set the sysctl tcp_stdurg). 4356 * either form (or just set the sysctl tcp_stdurg).
4198 */ 4357 */
4199 4358
4200static void tcp_check_urg(struct sock * sk, struct tcphdr * th) 4359static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
4201{ 4360{
4202 struct tcp_sock *tp = tcp_sk(sk); 4361 struct tcp_sock *tp = tcp_sk(sk);
4203 u32 ptr = ntohs(th->urg_ptr); 4362 u32 ptr = ntohs(th->urg_ptr);
@@ -4246,8 +4405,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
4246 * buggy users. 4405 * buggy users.
4247 */ 4406 */
4248 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 4407 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4249 !sock_flag(sk, SOCK_URGINLINE) && 4408 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4250 tp->copied_seq != tp->rcv_nxt) {
4251 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 4409 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4252 tp->copied_seq++; 4410 tp->copied_seq++;
4253 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { 4411 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
@@ -4256,8 +4414,8 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
4256 } 4414 }
4257 } 4415 }
4258 4416
4259 tp->urg_data = TCP_URG_NOTYET; 4417 tp->urg_data = TCP_URG_NOTYET;
4260 tp->urg_seq = ptr; 4418 tp->urg_seq = ptr;
4261 4419
4262 /* Disable header prediction. */ 4420 /* Disable header prediction. */
4263 tp->pred_flags = 0; 4421 tp->pred_flags = 0;
@@ -4270,7 +4428,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4270 4428
4271 /* Check if we get a new urgent pointer - normally not. */ 4429 /* Check if we get a new urgent pointer - normally not. */
4272 if (th->urg) 4430 if (th->urg)
4273 tcp_check_urg(sk,th); 4431 tcp_check_urg(sk, th);
4274 4432
4275 /* Do we wait for any urgent data? - normally not... */ 4433 /* Do we wait for any urgent data? - normally not... */
4276 if (tp->urg_data == TCP_URG_NOTYET) { 4434 if (tp->urg_data == TCP_URG_NOTYET) {
@@ -4312,7 +4470,8 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4312 return err; 4470 return err;
4313} 4471}
4314 4472
4315static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) 4473static __sum16 __tcp_checksum_complete_user(struct sock *sk,
4474 struct sk_buff *skb)
4316{ 4475{
4317 __sum16 result; 4476 __sum16 result;
4318 4477
@@ -4326,14 +4485,16 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
4326 return result; 4485 return result;
4327} 4486}
4328 4487
4329static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) 4488static inline int tcp_checksum_complete_user(struct sock *sk,
4489 struct sk_buff *skb)
4330{ 4490{
4331 return !skb_csum_unnecessary(skb) && 4491 return !skb_csum_unnecessary(skb) &&
4332 __tcp_checksum_complete_user(sk, skb); 4492 __tcp_checksum_complete_user(sk, skb);
4333} 4493}
4334 4494
4335#ifdef CONFIG_NET_DMA 4495#ifdef CONFIG_NET_DMA
4336static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen) 4496static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
4497 int hlen)
4337{ 4498{
4338 struct tcp_sock *tp = tcp_sk(sk); 4499 struct tcp_sock *tp = tcp_sk(sk);
4339 int chunk = skb->len - hlen; 4500 int chunk = skb->len - hlen;
@@ -4349,7 +4510,9 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
4349 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { 4510 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
4350 4511
4351 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, 4512 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
4352 skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); 4513 skb, hlen,
4514 tp->ucopy.iov, chunk,
4515 tp->ucopy.pinned_list);
4353 4516
4354 if (dma_cookie < 0) 4517 if (dma_cookie < 0)
4355 goto out; 4518 goto out;
@@ -4431,7 +4594,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4431 */ 4594 */
4432 4595
4433 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && 4596 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
4434 TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { 4597 TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4435 int tcp_header_len = tp->tcp_header_len; 4598 int tcp_header_len = tp->tcp_header_len;
4436 4599
4437 /* Timestamp header prediction: tcp_header_len 4600 /* Timestamp header prediction: tcp_header_len
@@ -4500,7 +4663,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4500 eaten = 1; 4663 eaten = 1;
4501 } 4664 }
4502#endif 4665#endif
4503 if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) { 4666 if (tp->ucopy.task == current &&
4667 sock_owned_by_user(sk) && !copied_early) {
4504 __set_current_state(TASK_RUNNING); 4668 __set_current_state(TASK_RUNNING);
4505 4669
4506 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) 4670 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
@@ -4547,9 +4711,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4547 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS); 4711 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
4548 4712
4549 /* Bulk data transfer: receiver */ 4713 /* Bulk data transfer: receiver */
4550 __skb_pull(skb,tcp_header_len); 4714 __skb_pull(skb, tcp_header_len);
4551 __skb_queue_tail(&sk->sk_receive_queue, skb); 4715 __skb_queue_tail(&sk->sk_receive_queue, skb);
4552 sk_stream_set_owner_r(skb, sk); 4716 skb_set_owner_r(skb, sk);
4553 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4717 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4554 } 4718 }
4555 4719
@@ -4579,7 +4743,7 @@ no_ack:
4579 } 4743 }
4580 4744
4581slow_path: 4745slow_path:
4582 if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb)) 4746 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
4583 goto csum_error; 4747 goto csum_error;
4584 4748
4585 /* 4749 /*
@@ -4786,7 +4950,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4786 4950
4787 if (!sock_flag(sk, SOCK_DEAD)) { 4951 if (!sock_flag(sk, SOCK_DEAD)) {
4788 sk->sk_state_change(sk); 4952 sk->sk_state_change(sk);
4789 sk_wake_async(sk, 0, POLL_OUT); 4953 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
4790 } 4954 }
4791 4955
4792 if (sk->sk_write_pending || 4956 if (sk->sk_write_pending ||
@@ -4829,7 +4993,8 @@ discard:
4829 } 4993 }
4830 4994
4831 /* PAWS check. */ 4995 /* PAWS check. */
4832 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0)) 4996 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
4997 tcp_paws_check(&tp->rx_opt, 0))
4833 goto discard_and_undo; 4998 goto discard_and_undo;
4834 4999
4835 if (th->syn) { 5000 if (th->syn) {
@@ -4864,7 +5029,6 @@ discard:
4864 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 5029 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4865 tcp_initialize_rcv_mss(sk); 5030 tcp_initialize_rcv_mss(sk);
4866 5031
4867
4868 tcp_send_synack(sk); 5032 tcp_send_synack(sk);
4869#if 0 5033#if 0
4870 /* Note, we could accept data and URG from this segment. 5034 /* Note, we could accept data and URG from this segment.
@@ -4896,7 +5060,6 @@ reset_and_undo:
4896 return 1; 5060 return 1;
4897} 5061}
4898 5062
4899
4900/* 5063/*
4901 * This function implements the receiving procedure of RFC 793 for 5064 * This function implements the receiving procedure of RFC 793 for
4902 * all states except ESTABLISHED and TIME_WAIT. 5065 * all states except ESTABLISHED and TIME_WAIT.
@@ -5016,9 +5179,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5016 * are not waked up, because sk->sk_sleep == 5179 * are not waked up, because sk->sk_sleep ==
5017 * NULL and sk->sk_socket == NULL. 5180 * NULL and sk->sk_socket == NULL.
5018 */ 5181 */
5019 if (sk->sk_socket) { 5182 if (sk->sk_socket)
5020 sk_wake_async(sk,0,POLL_OUT); 5183 sk_wake_async(sk,
5021 } 5184 SOCK_WAKE_IO, POLL_OUT);
5022 5185
5023 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5186 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5024 tp->snd_wnd = ntohs(th->window) << 5187 tp->snd_wnd = ntohs(th->window) <<
@@ -5030,8 +5193,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5030 * and does not calculate rtt. 5193 * and does not calculate rtt.
5031 * Fix it at least with timestamps. 5194 * Fix it at least with timestamps.
5032 */ 5195 */
5033 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 5196 if (tp->rx_opt.saw_tstamp &&
5034 !tp->srtt) 5197 tp->rx_opt.rcv_tsecr && !tp->srtt)
5035 tcp_ack_saw_tstamp(sk, 0); 5198 tcp_ack_saw_tstamp(sk, 0);
5036 5199
5037 if (tp->rx_opt.tstamp_ok) 5200 if (tp->rx_opt.tstamp_ok)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad759f1c3777..77c1939a2b0d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -99,7 +99,7 @@ static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
99static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 99static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
100 __be32 saddr, __be32 daddr, 100 __be32 saddr, __be32 daddr,
101 struct tcphdr *th, int protocol, 101 struct tcphdr *th, int protocol,
102 int tcplen); 102 unsigned int tcplen);
103#endif 103#endif
104 104
105struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 105struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -369,8 +369,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
369 return; 369 return;
370 } 370 }
371 371
372 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, 372 sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest,
373 th->source, inet_iif(skb)); 373 iph->saddr, th->source, inet_iif(skb));
374 if (!sk) { 374 if (!sk) {
375 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 375 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
376 return; 376 return;
@@ -858,16 +858,16 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
858 u8 *newkey, u8 newkeylen) 858 u8 *newkey, u8 newkeylen)
859{ 859{
860 /* Add Key to the list */ 860 /* Add Key to the list */
861 struct tcp4_md5sig_key *key; 861 struct tcp_md5sig_key *key;
862 struct tcp_sock *tp = tcp_sk(sk); 862 struct tcp_sock *tp = tcp_sk(sk);
863 struct tcp4_md5sig_key *keys; 863 struct tcp4_md5sig_key *keys;
864 864
865 key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr); 865 key = tcp_v4_md5_do_lookup(sk, addr);
866 if (key) { 866 if (key) {
867 /* Pre-existing entry - just update that one. */ 867 /* Pre-existing entry - just update that one. */
868 kfree(key->base.key); 868 kfree(key->key);
869 key->base.key = newkey; 869 key->key = newkey;
870 key->base.keylen = newkeylen; 870 key->keylen = newkeylen;
871 } else { 871 } else {
872 struct tcp_md5sig_info *md5sig; 872 struct tcp_md5sig_info *md5sig;
873 873
@@ -900,8 +900,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
900 sizeof(*keys) * md5sig->entries4); 900 sizeof(*keys) * md5sig->entries4);
901 901
902 /* Free old key list, and reference new one */ 902 /* Free old key list, and reference new one */
903 if (md5sig->keys4) 903 kfree(md5sig->keys4);
904 kfree(md5sig->keys4);
905 md5sig->keys4 = keys; 904 md5sig->keys4 = keys;
906 md5sig->alloced4++; 905 md5sig->alloced4++;
907 } 906 }
@@ -939,10 +938,10 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
939 tp->md5sig_info->alloced4 = 0; 938 tp->md5sig_info->alloced4 = 0;
940 } else if (tp->md5sig_info->entries4 != i) { 939 } else if (tp->md5sig_info->entries4 != i) {
941 /* Need to do some manipulation */ 940 /* Need to do some manipulation */
942 memcpy(&tp->md5sig_info->keys4[i], 941 memmove(&tp->md5sig_info->keys4[i],
943 &tp->md5sig_info->keys4[i+1], 942 &tp->md5sig_info->keys4[i+1],
944 (tp->md5sig_info->entries4 - i) * 943 (tp->md5sig_info->entries4 - i) *
945 sizeof(struct tcp4_md5sig_key)); 944 sizeof(struct tcp4_md5sig_key));
946 } 945 }
947 tcp_free_md5sig_pool(); 946 tcp_free_md5sig_pool();
948 return 0; 947 return 0;
@@ -1021,7 +1020,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1021static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 1020static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1022 __be32 saddr, __be32 daddr, 1021 __be32 saddr, __be32 daddr,
1023 struct tcphdr *th, int protocol, 1022 struct tcphdr *th, int protocol,
1024 int tcplen) 1023 unsigned int tcplen)
1025{ 1024{
1026 struct scatterlist sg[4]; 1025 struct scatterlist sg[4];
1027 __u16 data_len; 1026 __u16 data_len;
@@ -1083,7 +1082,7 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1083 sg_set_buf(&sg[block++], key->key, key->keylen); 1082 sg_set_buf(&sg[block++], key->key, key->keylen);
1084 nbytes += key->keylen; 1083 nbytes += key->keylen;
1085 1084
1086 sg_mark_end(sg, block); 1085 sg_mark_end(&sg[block - 1]);
1087 1086
1088 /* Now store the Hash into the packet */ 1087 /* Now store the Hash into the packet */
1089 err = crypto_hash_init(desc); 1088 err = crypto_hash_init(desc);
@@ -1114,7 +1113,7 @@ int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1114 struct dst_entry *dst, 1113 struct dst_entry *dst,
1115 struct request_sock *req, 1114 struct request_sock *req,
1116 struct tcphdr *th, int protocol, 1115 struct tcphdr *th, int protocol,
1117 int tcplen) 1116 unsigned int tcplen)
1118{ 1117{
1119 __be32 saddr, daddr; 1118 __be32 saddr, daddr;
1120 1119
@@ -1479,7 +1478,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1479 } 1478 }
1480#endif 1479#endif
1481 1480
1482 __inet_hash(&tcp_hashinfo, newsk, 0); 1481 __inet_hash_nolisten(&tcp_hashinfo, newsk);
1483 __inet_inherit_port(&tcp_hashinfo, sk, newsk); 1482 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1484 1483
1485 return newsk; 1484 return newsk;
@@ -1504,8 +1503,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1504 if (req) 1503 if (req)
1505 return tcp_check_req(sk, skb, req, prev); 1504 return tcp_check_req(sk, skb, req, prev);
1506 1505
1507 nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, 1506 nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr,
1508 iph->daddr, th->dest, inet_iif(skb)); 1507 th->source, iph->daddr, th->dest, inet_iif(skb));
1509 1508
1510 if (nsk) { 1509 if (nsk) {
1511 if (nsk->sk_state != TCP_TIME_WAIT) { 1510 if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1662,8 +1661,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
1662 TCP_SKB_CB(skb)->flags = iph->tos; 1661 TCP_SKB_CB(skb)->flags = iph->tos;
1663 TCP_SKB_CB(skb)->sacked = 0; 1662 TCP_SKB_CB(skb)->sacked = 0;
1664 1663
1665 sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, 1664 sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr,
1666 iph->daddr, th->dest, inet_iif(skb)); 1665 th->source, iph->daddr, th->dest, inet_iif(skb));
1667 if (!sk) 1666 if (!sk)
1668 goto no_tcp_socket; 1667 goto no_tcp_socket;
1669 1668
@@ -1736,7 +1735,8 @@ do_time_wait:
1736 } 1735 }
1737 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1736 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1738 case TCP_TW_SYN: { 1737 case TCP_TW_SYN: {
1739 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 1738 struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net,
1739 &tcp_hashinfo,
1740 iph->daddr, th->dest, 1740 iph->daddr, th->dest,
1741 inet_iif(skb)); 1741 inet_iif(skb));
1742 if (sk2) { 1742 if (sk2) {
@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq)
2049 struct sock *sk; 2049 struct sock *sk;
2050 struct hlist_node *node; 2050 struct hlist_node *node;
2051 struct inet_timewait_sock *tw; 2051 struct inet_timewait_sock *tw;
2052 rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2052 2053
2053 read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2054 read_lock_bh(lock);
2054 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2055 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2055 if (sk->sk_family != st->family) { 2056 if (sk->sk_family != st->family) {
2056 continue; 2057 continue;
@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq)
2067 rc = tw; 2068 rc = tw;
2068 goto out; 2069 goto out;
2069 } 2070 }
2070 read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2071 read_unlock_bh(lock);
2071 st->state = TCP_SEQ_STATE_ESTABLISHED; 2072 st->state = TCP_SEQ_STATE_ESTABLISHED;
2072 } 2073 }
2073out: 2074out:
@@ -2094,11 +2095,11 @@ get_tw:
2094 cur = tw; 2095 cur = tw;
2095 goto out; 2096 goto out;
2096 } 2097 }
2097 read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2098 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2098 st->state = TCP_SEQ_STATE_ESTABLISHED; 2099 st->state = TCP_SEQ_STATE_ESTABLISHED;
2099 2100
2100 if (++st->bucket < tcp_hashinfo.ehash_size) { 2101 if (++st->bucket < tcp_hashinfo.ehash_size) {
2101 read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2102 read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2102 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); 2103 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2103 } else { 2104 } else {
2104 cur = NULL; 2105 cur = NULL;
@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2206 case TCP_SEQ_STATE_TIME_WAIT: 2207 case TCP_SEQ_STATE_TIME_WAIT:
2207 case TCP_SEQ_STATE_ESTABLISHED: 2208 case TCP_SEQ_STATE_ESTABLISHED:
2208 if (v) 2209 if (v)
2209 read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2210 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2210 break; 2211 break;
2211 } 2212 }
2212} 2213}
@@ -2417,6 +2418,8 @@ void tcp4_proc_exit(void)
2417} 2418}
2418#endif /* CONFIG_PROC_FS */ 2419#endif /* CONFIG_PROC_FS */
2419 2420
2421DEFINE_PROTO_INUSE(tcp)
2422
2420struct proto tcp_prot = { 2423struct proto tcp_prot = {
2421 .name = "TCP", 2424 .name = "TCP",
2422 .owner = THIS_MODULE, 2425 .owner = THIS_MODULE,
@@ -2451,6 +2454,7 @@ struct proto tcp_prot = {
2451 .compat_setsockopt = compat_tcp_setsockopt, 2454 .compat_setsockopt = compat_tcp_setsockopt,
2452 .compat_getsockopt = compat_tcp_getsockopt, 2455 .compat_getsockopt = compat_tcp_getsockopt,
2453#endif 2456#endif
2457 REF_PROTO_INUSE(tcp)
2454}; 2458};
2455 2459
2456void __init tcp_v4_init(struct net_proto_family *ops) 2460void __init tcp_v4_init(struct net_proto_family *ops)
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index e7f5ef92cbd8..ce3c41ff50b2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,12 +115,12 @@ static void tcp_lp_init(struct sock *sk)
115 * Will only call newReno CA when away from inference. 115 * Will only call newReno CA when away from inference.
116 * From TCP-LP's paper, this will be handled in additive increasement. 116 * From TCP-LP's paper, this will be handled in additive increasement.
117 */ 117 */
118static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) 118static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
119{ 119{
120 struct lp *lp = inet_csk_ca(sk); 120 struct lp *lp = inet_csk_ca(sk);
121 121
122 if (!(lp->flag & LP_WITHIN_INF)) 122 if (!(lp->flag & LP_WITHIN_INF))
123 tcp_reno_cong_avoid(sk, ack, in_flight, flag); 123 tcp_reno_cong_avoid(sk, ack, in_flight);
124} 124}
125 125
126/** 126/**
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 324b4207254a..ed750f9ceb07 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -61,27 +61,24 @@ int sysctl_tcp_base_mss __read_mostly = 512;
61/* By default, RFC2861 behavior. */ 61/* By default, RFC2861 behavior. */
62int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 62int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
63 63
64static inline void tcp_packets_out_inc(struct sock *sk, 64static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
65 const struct sk_buff *skb)
66{ 65{
67 struct tcp_sock *tp = tcp_sk(sk); 66 struct tcp_sock *tp = tcp_sk(sk);
68 int orig = tp->packets_out; 67 unsigned int prior_packets = tp->packets_out;
68
69 tcp_advance_send_head(sk, skb);
70 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
71
72 /* Don't override Nagle indefinately with F-RTO */
73 if (tp->frto_counter == 2)
74 tp->frto_counter = 3;
69 75
70 tp->packets_out += tcp_skb_pcount(skb); 76 tp->packets_out += tcp_skb_pcount(skb);
71 if (!orig) 77 if (!prior_packets)
72 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 78 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
73 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 79 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
74} 80}
75 81
76static void update_send_head(struct sock *sk, struct sk_buff *skb)
77{
78 struct tcp_sock *tp = tcp_sk(sk);
79
80 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82 tcp_packets_out_inc(sk, skb);
83}
84
85/* SND.NXT, if window was not shrunk. 82/* SND.NXT, if window was not shrunk.
86 * If window has been shrunk, what should we make? It is not clear at all. 83 * If window has been shrunk, what should we make? It is not clear at all.
87 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( 84 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
@@ -92,10 +89,10 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
92{ 89{
93 struct tcp_sock *tp = tcp_sk(sk); 90 struct tcp_sock *tp = tcp_sk(sk);
94 91
95 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) 92 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
96 return tp->snd_nxt; 93 return tp->snd_nxt;
97 else 94 else
98 return tp->snd_una+tp->snd_wnd; 95 return tcp_wnd_end(tp);
99} 96}
100 97
101/* Calculate mss to advertise in SYN segment. 98/* Calculate mss to advertise in SYN segment.
@@ -224,14 +221,14 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 * following RFC2414. Senders, not following this RFC, 221 * following RFC2414. Senders, not following this RFC,
225 * will be satisfied with 2. 222 * will be satisfied with 2.
226 */ 223 */
227 if (mss > (1<<*rcv_wscale)) { 224 if (mss > (1 << *rcv_wscale)) {
228 int init_cwnd = 4; 225 int init_cwnd = 4;
229 if (mss > 1460*3) 226 if (mss > 1460 * 3)
230 init_cwnd = 2; 227 init_cwnd = 2;
231 else if (mss > 1460) 228 else if (mss > 1460)
232 init_cwnd = 3; 229 init_cwnd = 3;
233 if (*rcv_wnd > init_cwnd*mss) 230 if (*rcv_wnd > init_cwnd * mss)
234 *rcv_wnd = init_cwnd*mss; 231 *rcv_wnd = init_cwnd * mss;
235 } 232 }
236 233
237 /* Set the clamp no higher than max representable value */ 234 /* Set the clamp no higher than max representable value */
@@ -281,11 +278,10 @@ static u16 tcp_select_window(struct sock *sk)
281 return new_win; 278 return new_win;
282} 279}
283 280
284static inline void TCP_ECN_send_synack(struct tcp_sock *tp, 281static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
285 struct sk_buff *skb)
286{ 282{
287 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 283 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
288 if (!(tp->ecn_flags&TCP_ECN_OK)) 284 if (!(tp->ecn_flags & TCP_ECN_OK))
289 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 285 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
290} 286}
291 287
@@ -295,7 +291,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
295 291
296 tp->ecn_flags = 0; 292 tp->ecn_flags = 0;
297 if (sysctl_tcp_ecn) { 293 if (sysctl_tcp_ecn) {
298 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; 294 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
299 tp->ecn_flags = TCP_ECN_OK; 295 tp->ecn_flags = TCP_ECN_OK;
300 } 296 }
301} 297}
@@ -317,7 +313,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
317 if (skb->len != tcp_header_len && 313 if (skb->len != tcp_header_len &&
318 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { 314 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
319 INET_ECN_xmit(sk); 315 INET_ECN_xmit(sk);
320 if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { 316 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
321 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 317 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
322 tcp_hdr(skb)->cwr = 1; 318 tcp_hdr(skb)->cwr = 1;
323 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 319 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
@@ -331,6 +327,26 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
331 } 327 }
332} 328}
333 329
330/* Constructs common control bits of non-data skb. If SYN/FIN is present,
331 * auto increment end seqno.
332 */
333static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
334{
335 skb->csum = 0;
336
337 TCP_SKB_CB(skb)->flags = flags;
338 TCP_SKB_CB(skb)->sacked = 0;
339
340 skb_shinfo(skb)->gso_segs = 1;
341 skb_shinfo(skb)->gso_size = 0;
342 skb_shinfo(skb)->gso_type = 0;
343
344 TCP_SKB_CB(skb)->seq = seq;
345 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
346 seq++;
347 TCP_SKB_CB(skb)->end_seq = seq;
348}
349
334static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, 350static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
335 __u32 tstamp, __u8 **md5_hash) 351 __u32 tstamp, __u8 **md5_hash)
336{ 352{
@@ -434,7 +450,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
434 (TCPOPT_NOP << 16) | 450 (TCPOPT_NOP << 16) |
435 (TCPOPT_MD5SIG << 8) | 451 (TCPOPT_MD5SIG << 8) |
436 TCPOLEN_MD5SIG); 452 TCPOLEN_MD5SIG);
437 *md5_hash = (__u8 *) ptr; 453 *md5_hash = (__u8 *)ptr;
438 } 454 }
439#endif 455#endif
440} 456}
@@ -450,7 +466,8 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
450 * We are working here with either a clone of the original 466 * We are working here with either a clone of the original
451 * SKB, or a fresh unique copy made by the retransmit engine. 467 * SKB, or a fresh unique copy made by the retransmit engine.
452 */ 468 */
453static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) 469static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
470 gfp_t gfp_mask)
454{ 471{
455 const struct inet_connection_sock *icsk = inet_csk(sk); 472 const struct inet_connection_sock *icsk = inet_csk(sk);
456 struct inet_sock *inet; 473 struct inet_sock *inet;
@@ -554,8 +571,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
554 th->urg_ptr = 0; 571 th->urg_ptr = 0;
555 572
556 if (unlikely(tp->urg_mode && 573 if (unlikely(tp->urg_mode &&
557 between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { 574 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
558 th->urg_ptr = htons(tp->snd_up-tcb->seq); 575 th->urg_ptr = htons(tp->snd_up - tcb->seq);
559 th->urg = 1; 576 th->urg = 1;
560 } 577 }
561 578
@@ -619,7 +636,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
619#undef SYSCTL_FLAG_SACK 636#undef SYSCTL_FLAG_SACK
620} 637}
621 638
622
623/* This routine just queue's the buffer 639/* This routine just queue's the buffer
624 * 640 *
625 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, 641 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -633,10 +649,12 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
633 tp->write_seq = TCP_SKB_CB(skb)->end_seq; 649 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
634 skb_header_release(skb); 650 skb_header_release(skb);
635 tcp_add_write_queue_tail(sk, skb); 651 tcp_add_write_queue_tail(sk, skb);
636 sk_charge_skb(sk, skb); 652 sk->sk_wmem_queued += skb->truesize;
653 sk_mem_charge(sk, skb->truesize);
637} 654}
638 655
639static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 656static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
657 unsigned int mss_now)
640{ 658{
641 if (skb->len <= mss_now || !sk_can_gso(sk)) { 659 if (skb->len <= mss_now || !sk_can_gso(sk)) {
642 /* Avoid the costly divide in the normal 660 /* Avoid the costly divide in the normal
@@ -653,23 +671,18 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
653} 671}
654 672
655/* When a modification to fackets out becomes necessary, we need to check 673/* When a modification to fackets out becomes necessary, we need to check
656 * skb is counted to fackets_out or not. Another important thing is to 674 * skb is counted to fackets_out or not.
657 * tweak SACK fastpath hint too as it would overwrite all changes unless
658 * hint is also changed.
659 */ 675 */
660static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb, 676static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
661 int decr) 677 int decr)
662{ 678{
679 struct tcp_sock *tp = tcp_sk(sk);
680
663 if (!tp->sacked_out || tcp_is_reno(tp)) 681 if (!tp->sacked_out || tcp_is_reno(tp))
664 return; 682 return;
665 683
666 if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq)) 684 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
667 tp->fackets_out -= decr; 685 tp->fackets_out -= decr;
668
669 /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */
670 if (tp->fastpath_skb_hint != NULL &&
671 after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
672 tp->fastpath_cnt_hint -= decr;
673} 686}
674 687
675/* Function to create two new TCP segments. Shrinks the given segment 688/* Function to create two new TCP segments. Shrinks the given segment
@@ -677,7 +690,8 @@ static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,
677 * packet to the list. This won't be called frequently, I hope. 690 * packet to the list. This won't be called frequently, I hope.
678 * Remember, these are still headerless SKBs at this point. 691 * Remember, these are still headerless SKBs at this point.
679 */ 692 */
680int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) 693int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
694 unsigned int mss_now)
681{ 695{
682 struct tcp_sock *tp = tcp_sk(sk); 696 struct tcp_sock *tp = tcp_sk(sk);
683 struct sk_buff *buff; 697 struct sk_buff *buff;
@@ -702,7 +716,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
702 if (buff == NULL) 716 if (buff == NULL)
703 return -ENOMEM; /* We'll just try again later. */ 717 return -ENOMEM; /* We'll just try again later. */
704 718
705 sk_charge_skb(sk, buff); 719 sk->sk_wmem_queued += buff->truesize;
720 sk_mem_charge(sk, buff->truesize);
706 nlen = skb->len - len - nsize; 721 nlen = skb->len - len - nsize;
707 buff->truesize += nlen; 722 buff->truesize += nlen;
708 skb->truesize -= nlen; 723 skb->truesize -= nlen;
@@ -712,20 +727,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
712 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; 727 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
713 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; 728 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
714 729
715 if (tcp_is_sack(tp) && tp->sacked_out &&
716 (TCP_SKB_CB(skb)->seq == tp->highest_sack))
717 tp->highest_sack = TCP_SKB_CB(buff)->seq;
718
719 /* PSH and FIN should only be set in the second packet. */ 730 /* PSH and FIN should only be set in the second packet. */
720 flags = TCP_SKB_CB(skb)->flags; 731 flags = TCP_SKB_CB(skb)->flags;
721 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 732 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
722 TCP_SKB_CB(buff)->flags = flags; 733 TCP_SKB_CB(buff)->flags = flags;
723 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 734 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
724 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
725 735
726 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 736 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
727 /* Copy and checksum data tail into the new buffer. */ 737 /* Copy and checksum data tail into the new buffer. */
728 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), 738 buff->csum = csum_partial_copy_nocheck(skb->data + len,
739 skb_put(buff, nsize),
729 nsize, 0); 740 nsize, 0);
730 741
731 skb_trim(skb, len); 742 skb_trim(skb, len);
@@ -772,7 +783,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
772 tcp_dec_pcount_approx_int(&tp->sacked_out, diff); 783 tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
773 tcp_verify_left_out(tp); 784 tcp_verify_left_out(tp);
774 } 785 }
775 tcp_adjust_fackets_out(tp, skb, diff); 786 tcp_adjust_fackets_out(sk, skb, diff);
776 } 787 }
777 788
778 /* Link BUFF into the send queue. */ 789 /* Link BUFF into the send queue. */
@@ -792,7 +803,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
792 803
793 eat = len; 804 eat = len;
794 k = 0; 805 k = 0;
795 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 806 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
796 if (skb_shinfo(skb)->frags[i].size <= eat) { 807 if (skb_shinfo(skb)->frags[i].size <= eat) {
797 put_page(skb_shinfo(skb)->frags[i].page); 808 put_page(skb_shinfo(skb)->frags[i].page);
798 eat -= skb_shinfo(skb)->frags[i].size; 809 eat -= skb_shinfo(skb)->frags[i].size;
@@ -815,8 +826,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
815 826
816int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) 827int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
817{ 828{
818 if (skb_cloned(skb) && 829 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
819 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
820 return -ENOMEM; 830 return -ENOMEM;
821 831
822 /* If len == headlen, we avoid __skb_pull to preserve alignment. */ 832 /* If len == headlen, we avoid __skb_pull to preserve alignment. */
@@ -830,7 +840,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
830 840
831 skb->truesize -= len; 841 skb->truesize -= len;
832 sk->sk_wmem_queued -= len; 842 sk->sk_wmem_queued -= len;
833 sk->sk_forward_alloc += len; 843 sk_mem_uncharge(sk, len);
834 sock_set_flag(sk, SOCK_QUEUE_SHRUNK); 844 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
835 845
836 /* Any change of skb->len requires recalculation of tso 846 /* Any change of skb->len requires recalculation of tso
@@ -898,6 +908,15 @@ void tcp_mtup_init(struct sock *sk)
898 icsk->icsk_mtup.probe_size = 0; 908 icsk->icsk_mtup.probe_size = 0;
899} 909}
900 910
911/* Bound MSS / TSO packet size with the half of the window */
912static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
913{
914 if (tp->max_window && pktsize > (tp->max_window >> 1))
915 return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
916 else
917 return pktsize;
918}
919
901/* This function synchronize snd mss to current pmtu/exthdr set. 920/* This function synchronize snd mss to current pmtu/exthdr set.
902 921
903 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts 922 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -920,7 +939,6 @@ void tcp_mtup_init(struct sock *sk)
920 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache 939 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
921 are READ ONLY outside this function. --ANK (980731) 940 are READ ONLY outside this function. --ANK (980731)
922 */ 941 */
923
924unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) 942unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
925{ 943{
926 struct tcp_sock *tp = tcp_sk(sk); 944 struct tcp_sock *tp = tcp_sk(sk);
@@ -931,10 +949,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
931 icsk->icsk_mtup.search_high = pmtu; 949 icsk->icsk_mtup.search_high = pmtu;
932 950
933 mss_now = tcp_mtu_to_mss(sk, pmtu); 951 mss_now = tcp_mtu_to_mss(sk, pmtu);
934 952 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
935 /* Bound mss with half of window */
936 if (tp->max_window && mss_now > (tp->max_window>>1))
937 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
938 953
939 /* And store cached results */ 954 /* And store cached results */
940 icsk->icsk_pmtu_cookie = pmtu; 955 icsk->icsk_pmtu_cookie = pmtu;
@@ -988,11 +1003,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
988 inet_csk(sk)->icsk_ext_hdr_len - 1003 inet_csk(sk)->icsk_ext_hdr_len -
989 tp->tcp_header_len); 1004 tp->tcp_header_len);
990 1005
991 if (tp->max_window && 1006 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
992 (xmit_size_goal > (tp->max_window >> 1)))
993 xmit_size_goal = max((tp->max_window >> 1),
994 68U - tp->tcp_header_len);
995
996 xmit_size_goal -= (xmit_size_goal % mss_now); 1007 xmit_size_goal -= (xmit_size_goal % mss_now);
997 } 1008 }
998 tp->xmit_size_goal = xmit_size_goal; 1009 tp->xmit_size_goal = xmit_size_goal;
@@ -1001,13 +1012,11 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
1001} 1012}
1002 1013
1003/* Congestion window validation. (RFC2861) */ 1014/* Congestion window validation. (RFC2861) */
1004
1005static void tcp_cwnd_validate(struct sock *sk) 1015static void tcp_cwnd_validate(struct sock *sk)
1006{ 1016{
1007 struct tcp_sock *tp = tcp_sk(sk); 1017 struct tcp_sock *tp = tcp_sk(sk);
1008 __u32 packets_out = tp->packets_out;
1009 1018
1010 if (packets_out >= tp->snd_cwnd) { 1019 if (tp->packets_out >= tp->snd_cwnd) {
1011 /* Network is feed fully. */ 1020 /* Network is feed fully. */
1012 tp->snd_cwnd_used = 0; 1021 tp->snd_cwnd_used = 0;
1013 tp->snd_cwnd_stamp = tcp_time_stamp; 1022 tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1022,19 +1031,35 @@ static void tcp_cwnd_validate(struct sock *sk)
1022 } 1031 }
1023} 1032}
1024 1033
1025static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) 1034/* Returns the portion of skb which can be sent right away without
1035 * introducing MSS oddities to segment boundaries. In rare cases where
1036 * mss_now != mss_cache, we will request caller to create a small skb
1037 * per input skb which could be mostly avoided here (if desired).
1038 */
1039static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
1040 unsigned int mss_now, unsigned int cwnd)
1026{ 1041{
1027 u32 window, cwnd_len; 1042 struct tcp_sock *tp = tcp_sk(sk);
1043 u32 needed, window, cwnd_len;
1028 1044
1029 window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq); 1045 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1030 cwnd_len = mss_now * cwnd; 1046 cwnd_len = mss_now * cwnd;
1031 return min(window, cwnd_len); 1047
1048 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1049 return cwnd_len;
1050
1051 if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
1052 return cwnd_len;
1053
1054 needed = min(skb->len, window);
1055 return needed - needed % mss_now;
1032} 1056}
1033 1057
1034/* Can at least one segment of SKB be sent right now, according to the 1058/* Can at least one segment of SKB be sent right now, according to the
1035 * congestion window rules? If so, return how many segments are allowed. 1059 * congestion window rules? If so, return how many segments are allowed.
1036 */ 1060 */
1037static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb) 1061static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1062 struct sk_buff *skb)
1038{ 1063{
1039 u32 in_flight, cwnd; 1064 u32 in_flight, cwnd;
1040 1065
@@ -1054,13 +1079,12 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
1054/* This must be invoked the first time we consider transmitting 1079/* This must be invoked the first time we consider transmitting
1055 * SKB onto the wire. 1080 * SKB onto the wire.
1056 */ 1081 */
1057static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 1082static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1083 unsigned int mss_now)
1058{ 1084{
1059 int tso_segs = tcp_skb_pcount(skb); 1085 int tso_segs = tcp_skb_pcount(skb);
1060 1086
1061 if (!tso_segs || 1087 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1062 (tso_segs > 1 &&
1063 tcp_skb_mss(skb) != mss_now)) {
1064 tcp_set_skb_tso_segs(sk, skb, mss_now); 1088 tcp_set_skb_tso_segs(sk, skb, mss_now);
1065 tso_segs = tcp_skb_pcount(skb); 1089 tso_segs = tcp_skb_pcount(skb);
1066 } 1090 }
@@ -1080,16 +1104,13 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
1080 * 4. Or TCP_CORK is not set, and all sent packets are ACKed. 1104 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1081 * With Minshall's modification: all sent small packets are ACKed. 1105 * With Minshall's modification: all sent small packets are ACKed.
1082 */ 1106 */
1083
1084static inline int tcp_nagle_check(const struct tcp_sock *tp, 1107static inline int tcp_nagle_check(const struct tcp_sock *tp,
1085 const struct sk_buff *skb, 1108 const struct sk_buff *skb,
1086 unsigned mss_now, int nonagle) 1109 unsigned mss_now, int nonagle)
1087{ 1110{
1088 return (skb->len < mss_now && 1111 return (skb->len < mss_now &&
1089 ((nonagle&TCP_NAGLE_CORK) || 1112 ((nonagle & TCP_NAGLE_CORK) ||
1090 (!nonagle && 1113 (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
1091 tp->packets_out &&
1092 tcp_minshall_check(tp))));
1093} 1114}
1094 1115
1095/* Return non-zero if the Nagle test allows this packet to be 1116/* Return non-zero if the Nagle test allows this packet to be
@@ -1121,14 +1142,15 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1121} 1142}
1122 1143
1123/* Does at least the first segment of SKB fit into the send window? */ 1144/* Does at least the first segment of SKB fit into the send window? */
1124static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss) 1145static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
1146 unsigned int cur_mss)
1125{ 1147{
1126 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 1148 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1127 1149
1128 if (skb->len > cur_mss) 1150 if (skb->len > cur_mss)
1129 end_seq = TCP_SKB_CB(skb)->seq + cur_mss; 1151 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1130 1152
1131 return !after(end_seq, tp->snd_una + tp->snd_wnd); 1153 return !after(end_seq, tcp_wnd_end(tp));
1132} 1154}
1133 1155
1134/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) 1156/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
@@ -1147,8 +1169,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1147 return 0; 1169 return 0;
1148 1170
1149 cwnd_quota = tcp_cwnd_test(tp, skb); 1171 cwnd_quota = tcp_cwnd_test(tp, skb);
1150 if (cwnd_quota && 1172 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1151 !tcp_snd_wnd_test(tp, skb, cur_mss))
1152 cwnd_quota = 0; 1173 cwnd_quota = 0;
1153 1174
1154 return cwnd_quota; 1175 return cwnd_quota;
@@ -1162,8 +1183,7 @@ int tcp_may_send_now(struct sock *sk)
1162 return (skb && 1183 return (skb &&
1163 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), 1184 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
1164 (tcp_skb_is_last(sk, skb) ? 1185 (tcp_skb_is_last(sk, skb) ?
1165 TCP_NAGLE_PUSH : 1186 tp->nonagle : TCP_NAGLE_PUSH)));
1166 tp->nonagle)));
1167} 1187}
1168 1188
1169/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1189/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -1173,7 +1193,8 @@ int tcp_may_send_now(struct sock *sk)
1173 * know that all the data is in scatter-gather pages, and that the 1193 * know that all the data is in scatter-gather pages, and that the
1174 * packet has never been sent out before (and thus is not cloned). 1194 * packet has never been sent out before (and thus is not cloned).
1175 */ 1195 */
1176static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) 1196static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1197 unsigned int mss_now)
1177{ 1198{
1178 struct sk_buff *buff; 1199 struct sk_buff *buff;
1179 int nlen = skb->len - len; 1200 int nlen = skb->len - len;
@@ -1183,11 +1204,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1183 if (skb->len != skb->data_len) 1204 if (skb->len != skb->data_len)
1184 return tcp_fragment(sk, skb, len, mss_now); 1205 return tcp_fragment(sk, skb, len, mss_now);
1185 1206
1186 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); 1207 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
1187 if (unlikely(buff == NULL)) 1208 if (unlikely(buff == NULL))
1188 return -ENOMEM; 1209 return -ENOMEM;
1189 1210
1190 sk_charge_skb(sk, buff); 1211 sk->sk_wmem_queued += buff->truesize;
1212 sk_mem_charge(sk, buff->truesize);
1191 buff->truesize += nlen; 1213 buff->truesize += nlen;
1192 skb->truesize -= nlen; 1214 skb->truesize -= nlen;
1193 1215
@@ -1198,7 +1220,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1198 1220
1199 /* PSH and FIN should only be set in the second packet. */ 1221 /* PSH and FIN should only be set in the second packet. */
1200 flags = TCP_SKB_CB(skb)->flags; 1222 flags = TCP_SKB_CB(skb)->flags;
1201 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1223 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
1202 TCP_SKB_CB(buff)->flags = flags; 1224 TCP_SKB_CB(buff)->flags = flags;
1203 1225
1204 /* This packet was never sent out yet, so no SACK bits. */ 1226 /* This packet was never sent out yet, so no SACK bits. */
@@ -1236,15 +1258,15 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1236 goto send_now; 1258 goto send_now;
1237 1259
1238 /* Defer for less than two clock ticks. */ 1260 /* Defer for less than two clock ticks. */
1239 if (!tp->tso_deferred && ((jiffies<<1)>>1) - (tp->tso_deferred>>1) > 1) 1261 if (tp->tso_deferred &&
1262 ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1240 goto send_now; 1263 goto send_now;
1241 1264
1242 in_flight = tcp_packets_in_flight(tp); 1265 in_flight = tcp_packets_in_flight(tp);
1243 1266
1244 BUG_ON(tcp_skb_pcount(skb) <= 1 || 1267 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1245 (tp->snd_cwnd <= in_flight));
1246 1268
1247 send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq; 1269 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1248 1270
1249 /* From in_flight test above, we know that cwnd > in_flight. */ 1271 /* From in_flight test above, we know that cwnd > in_flight. */
1250 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; 1272 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
@@ -1275,7 +1297,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1275 } 1297 }
1276 1298
1277 /* Ok, it looks like it is advisable to defer. */ 1299 /* Ok, it looks like it is advisable to defer. */
1278 tp->tso_deferred = 1 | (jiffies<<1); 1300 tp->tso_deferred = 1 | (jiffies << 1);
1279 1301
1280 return 1; 1302 return 1;
1281 1303
@@ -1287,7 +1309,8 @@ send_now:
1287/* Create a new MTU probe if we are ready. 1309/* Create a new MTU probe if we are ready.
1288 * Returns 0 if we should wait to probe (no cwnd available), 1310 * Returns 0 if we should wait to probe (no cwnd available),
1289 * 1 if a probe was sent, 1311 * 1 if a probe was sent,
1290 * -1 otherwise */ 1312 * -1 otherwise
1313 */
1291static int tcp_mtu_probe(struct sock *sk) 1314static int tcp_mtu_probe(struct sock *sk)
1292{ 1315{
1293 struct tcp_sock *tp = tcp_sk(sk); 1316 struct tcp_sock *tp = tcp_sk(sk);
@@ -1295,7 +1318,7 @@ static int tcp_mtu_probe(struct sock *sk)
1295 struct sk_buff *skb, *nskb, *next; 1318 struct sk_buff *skb, *nskb, *next;
1296 int len; 1319 int len;
1297 int probe_size; 1320 int probe_size;
1298 unsigned int pif; 1321 int size_needed;
1299 int copy; 1322 int copy;
1300 int mss_now; 1323 int mss_now;
1301 1324
@@ -1312,34 +1335,25 @@ static int tcp_mtu_probe(struct sock *sk)
1312 1335
1313 /* Very simple search strategy: just double the MSS. */ 1336 /* Very simple search strategy: just double the MSS. */
1314 mss_now = tcp_current_mss(sk, 0); 1337 mss_now = tcp_current_mss(sk, 0);
1315 probe_size = 2*tp->mss_cache; 1338 probe_size = 2 * tp->mss_cache;
1339 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1316 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { 1340 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1317 /* TODO: set timer for probe_converge_event */ 1341 /* TODO: set timer for probe_converge_event */
1318 return -1; 1342 return -1;
1319 } 1343 }
1320 1344
1321 /* Have enough data in the send queue to probe? */ 1345 /* Have enough data in the send queue to probe? */
1322 len = 0; 1346 if (tp->write_seq - tp->snd_nxt < size_needed)
1323 if ((skb = tcp_send_head(sk)) == NULL)
1324 return -1;
1325 while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
1326 skb = tcp_write_queue_next(sk, skb);
1327 if (len < probe_size)
1328 return -1; 1347 return -1;
1329 1348
1330 /* Receive window check. */ 1349 if (tp->snd_wnd < size_needed)
1331 if (after(TCP_SKB_CB(skb)->seq + probe_size, tp->snd_una + tp->snd_wnd)) { 1350 return -1;
1332 if (tp->snd_wnd < probe_size) 1351 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1333 return -1; 1352 return 0;
1334 else
1335 return 0;
1336 }
1337 1353
1338 /* Do we need to wait to drain cwnd? */ 1354 /* Do we need to wait to drain cwnd? With none in flight, don't stall */
1339 pif = tcp_packets_in_flight(tp); 1355 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1340 if (pif + 2 > tp->snd_cwnd) { 1356 if (!tcp_packets_in_flight(tp))
1341 /* With no packets in flight, don't stall. */
1342 if (pif == 0)
1343 return -1; 1357 return -1;
1344 else 1358 else
1345 return 0; 1359 return 0;
@@ -1348,11 +1362,10 @@ static int tcp_mtu_probe(struct sock *sk)
1348 /* We're allowed to probe. Build it now. */ 1362 /* We're allowed to probe. Build it now. */
1349 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) 1363 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1350 return -1; 1364 return -1;
1351 sk_charge_skb(sk, nskb); 1365 sk->sk_wmem_queued += nskb->truesize;
1366 sk_mem_charge(sk, nskb->truesize);
1352 1367
1353 skb = tcp_send_head(sk); 1368 skb = tcp_send_head(sk);
1354 tcp_insert_write_queue_before(nskb, skb, sk);
1355 tcp_advance_send_head(sk, skb);
1356 1369
1357 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1370 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1358 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1371 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1361,30 +1374,32 @@ static int tcp_mtu_probe(struct sock *sk)
1361 nskb->csum = 0; 1374 nskb->csum = 0;
1362 nskb->ip_summed = skb->ip_summed; 1375 nskb->ip_summed = skb->ip_summed;
1363 1376
1364 len = 0; 1377 tcp_insert_write_queue_before(nskb, skb, sk);
1365 while (len < probe_size) {
1366 next = tcp_write_queue_next(sk, skb);
1367 1378
1379 len = 0;
1380 tcp_for_write_queue_from_safe(skb, next, sk) {
1368 copy = min_t(int, skb->len, probe_size - len); 1381 copy = min_t(int, skb->len, probe_size - len);
1369 if (nskb->ip_summed) 1382 if (nskb->ip_summed)
1370 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); 1383 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1371 else 1384 else
1372 nskb->csum = skb_copy_and_csum_bits(skb, 0, 1385 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1373 skb_put(nskb, copy), copy, nskb->csum); 1386 skb_put(nskb, copy),
1387 copy, nskb->csum);
1374 1388
1375 if (skb->len <= copy) { 1389 if (skb->len <= copy) {
1376 /* We've eaten all the data from this skb. 1390 /* We've eaten all the data from this skb.
1377 * Throw it away. */ 1391 * Throw it away. */
1378 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; 1392 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1379 tcp_unlink_write_queue(skb, sk); 1393 tcp_unlink_write_queue(skb, sk);
1380 sk_stream_free_skb(sk, skb); 1394 sk_wmem_free_skb(sk, skb);
1381 } else { 1395 } else {
1382 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1396 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1383 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1397 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
1384 if (!skb_shinfo(skb)->nr_frags) { 1398 if (!skb_shinfo(skb)->nr_frags) {
1385 skb_pull(skb, copy); 1399 skb_pull(skb, copy);
1386 if (skb->ip_summed != CHECKSUM_PARTIAL) 1400 if (skb->ip_summed != CHECKSUM_PARTIAL)
1387 skb->csum = csum_partial(skb->data, skb->len, 0); 1401 skb->csum = csum_partial(skb->data,
1402 skb->len, 0);
1388 } else { 1403 } else {
1389 __pskb_trim_head(skb, copy); 1404 __pskb_trim_head(skb, copy);
1390 tcp_set_skb_tso_segs(sk, skb, mss_now); 1405 tcp_set_skb_tso_segs(sk, skb, mss_now);
@@ -1393,7 +1408,9 @@ static int tcp_mtu_probe(struct sock *sk)
1393 } 1408 }
1394 1409
1395 len += copy; 1410 len += copy;
1396 skb = next; 1411
1412 if (len >= probe_size)
1413 break;
1397 } 1414 }
1398 tcp_init_tso_segs(sk, nskb, nskb->len); 1415 tcp_init_tso_segs(sk, nskb, nskb->len);
1399 1416
@@ -1402,9 +1419,9 @@ static int tcp_mtu_probe(struct sock *sk)
1402 TCP_SKB_CB(nskb)->when = tcp_time_stamp; 1419 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1403 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { 1420 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1404 /* Decrement cwnd here because we are sending 1421 /* Decrement cwnd here because we are sending
1405 * effectively two packets. */ 1422 * effectively two packets. */
1406 tp->snd_cwnd--; 1423 tp->snd_cwnd--;
1407 update_send_head(sk, nskb); 1424 tcp_event_new_data_sent(sk, nskb);
1408 1425
1409 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); 1426 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1410 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; 1427 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1416,7 +1433,6 @@ static int tcp_mtu_probe(struct sock *sk)
1416 return -1; 1433 return -1;
1417} 1434}
1418 1435
1419
1420/* This routine writes packets to the network. It advances the 1436/* This routine writes packets to the network. It advances the
1421 * send_head. This happens as incoming acks open up the remote 1437 * send_head. This happens as incoming acks open up the remote
1422 * window for us. 1438 * window for us.
@@ -1472,17 +1488,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1472 } 1488 }
1473 1489
1474 limit = mss_now; 1490 limit = mss_now;
1475 if (tso_segs > 1) { 1491 if (tso_segs > 1)
1476 limit = tcp_window_allows(tp, skb, 1492 limit = tcp_mss_split_point(sk, skb, mss_now,
1477 mss_now, cwnd_quota); 1493 cwnd_quota);
1478
1479 if (skb->len < limit) {
1480 unsigned int trim = skb->len % mss_now;
1481
1482 if (trim)
1483 limit = skb->len - trim;
1484 }
1485 }
1486 1494
1487 if (skb->len > limit && 1495 if (skb->len > limit &&
1488 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1496 unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1496,7 +1504,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1496 /* Advance the send_head. This one is sent out. 1504 /* Advance the send_head. This one is sent out.
1497 * This call will increment packets_out. 1505 * This call will increment packets_out.
1498 */ 1506 */
1499 update_send_head(sk, skb); 1507 tcp_event_new_data_sent(sk, skb);
1500 1508
1501 tcp_minshall_update(tp, mss_now, skb); 1509 tcp_minshall_update(tp, mss_now, skb);
1502 sent_pkts++; 1510 sent_pkts++;
@@ -1529,7 +1537,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1529 */ 1537 */
1530void tcp_push_one(struct sock *sk, unsigned int mss_now) 1538void tcp_push_one(struct sock *sk, unsigned int mss_now)
1531{ 1539{
1532 struct tcp_sock *tp = tcp_sk(sk);
1533 struct sk_buff *skb = tcp_send_head(sk); 1540 struct sk_buff *skb = tcp_send_head(sk);
1534 unsigned int tso_segs, cwnd_quota; 1541 unsigned int tso_segs, cwnd_quota;
1535 1542
@@ -1544,17 +1551,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1544 BUG_ON(!tso_segs); 1551 BUG_ON(!tso_segs);
1545 1552
1546 limit = mss_now; 1553 limit = mss_now;
1547 if (tso_segs > 1) { 1554 if (tso_segs > 1)
1548 limit = tcp_window_allows(tp, skb, 1555 limit = tcp_mss_split_point(sk, skb, mss_now,
1549 mss_now, cwnd_quota); 1556 cwnd_quota);
1550
1551 if (skb->len < limit) {
1552 unsigned int trim = skb->len % mss_now;
1553
1554 if (trim)
1555 limit = skb->len - trim;
1556 }
1557 }
1558 1557
1559 if (skb->len > limit && 1558 if (skb->len > limit &&
1560 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1559 unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1564,7 +1563,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1564 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1563 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1565 1564
1566 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { 1565 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1567 update_send_head(sk, skb); 1566 tcp_event_new_data_sent(sk, skb);
1568 tcp_cwnd_validate(sk); 1567 tcp_cwnd_validate(sk);
1569 return; 1568 return;
1570 } 1569 }
@@ -1641,11 +1640,12 @@ u32 __tcp_select_window(struct sock *sk)
1641 if (mss > full_space) 1640 if (mss > full_space)
1642 mss = full_space; 1641 mss = full_space;
1643 1642
1644 if (free_space < full_space/2) { 1643 if (free_space < (full_space >> 1)) {
1645 icsk->icsk_ack.quick = 0; 1644 icsk->icsk_ack.quick = 0;
1646 1645
1647 if (tcp_memory_pressure) 1646 if (tcp_memory_pressure)
1648 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); 1647 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1648 4U * tp->advmss);
1649 1649
1650 if (free_space < mss) 1650 if (free_space < mss)
1651 return 0; 1651 return 0;
@@ -1678,9 +1678,9 @@ u32 __tcp_select_window(struct sock *sk)
1678 * is too small. 1678 * is too small.
1679 */ 1679 */
1680 if (window <= free_space - mss || window > free_space) 1680 if (window <= free_space - mss || window > free_space)
1681 window = (free_space/mss)*mss; 1681 window = (free_space / mss) * mss;
1682 else if (mss == full_space && 1682 else if (mss == full_space &&
1683 free_space > window + full_space/2) 1683 free_space > window + (full_space >> 1))
1684 window = free_space; 1684 window = free_space;
1685 } 1685 }
1686 1686
@@ -1688,86 +1688,82 @@ u32 __tcp_select_window(struct sock *sk)
1688} 1688}
1689 1689
1690/* Attempt to collapse two adjacent SKB's during retransmission. */ 1690/* Attempt to collapse two adjacent SKB's during retransmission. */
1691static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) 1691static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
1692 int mss_now)
1692{ 1693{
1693 struct tcp_sock *tp = tcp_sk(sk); 1694 struct tcp_sock *tp = tcp_sk(sk);
1694 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 1695 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1696 int skb_size, next_skb_size;
1697 u16 flags;
1695 1698
1696 /* The first test we must make is that neither of these two 1699 /* The first test we must make is that neither of these two
1697 * SKB's are still referenced by someone else. 1700 * SKB's are still referenced by someone else.
1698 */ 1701 */
1699 if (!skb_cloned(skb) && !skb_cloned(next_skb)) { 1702 if (skb_cloned(skb) || skb_cloned(next_skb))
1700 int skb_size = skb->len, next_skb_size = next_skb->len; 1703 return;
1701 u16 flags = TCP_SKB_CB(skb)->flags;
1702 1704
1703 /* Also punt if next skb has been SACK'd. */ 1705 skb_size = skb->len;
1704 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) 1706 next_skb_size = next_skb->len;
1705 return; 1707 flags = TCP_SKB_CB(skb)->flags;
1706 1708
1707 /* Next skb is out of window. */ 1709 /* Also punt if next skb has been SACK'd. */
1708 if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd)) 1710 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1709 return; 1711 return;
1710 1712
1711 /* Punt if not enough space exists in the first SKB for 1713 /* Next skb is out of window. */
1712 * the data in the second, or the total combined payload 1714 if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
1713 * would exceed the MSS. 1715 return;
1714 */
1715 if ((next_skb_size > skb_tailroom(skb)) ||
1716 ((skb_size + next_skb_size) > mss_now))
1717 return;
1718 1716
1719 BUG_ON(tcp_skb_pcount(skb) != 1 || 1717 /* Punt if not enough space exists in the first SKB for
1720 tcp_skb_pcount(next_skb) != 1); 1718 * the data in the second, or the total combined payload
1719 * would exceed the MSS.
1720 */
1721 if ((next_skb_size > skb_tailroom(skb)) ||
1722 ((skb_size + next_skb_size) > mss_now))
1723 return;
1721 1724
1722 if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && 1725 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1723 (TCP_SKB_CB(next_skb)->seq == tp->highest_sack)))
1724 return;
1725 1726
1726 /* Ok. We will be able to collapse the packet. */ 1727 tcp_highest_sack_combine(sk, next_skb, skb);
1727 tcp_unlink_write_queue(next_skb, sk);
1728 1728
1729 skb_copy_from_linear_data(next_skb, 1729 /* Ok. We will be able to collapse the packet. */
1730 skb_put(skb, next_skb_size), 1730 tcp_unlink_write_queue(next_skb, sk);
1731 next_skb_size);
1732 1731
1733 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 1732 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
1734 skb->ip_summed = CHECKSUM_PARTIAL; 1733 next_skb_size);
1735 1734
1736 if (skb->ip_summed != CHECKSUM_PARTIAL) 1735 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1737 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); 1736 skb->ip_summed = CHECKSUM_PARTIAL;
1738 1737
1739 /* Update sequence range on original skb. */ 1738 if (skb->ip_summed != CHECKSUM_PARTIAL)
1740 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1739 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1741 1740
1742 /* Merge over control information. */ 1741 /* Update sequence range on original skb. */
1743 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ 1742 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1744 TCP_SKB_CB(skb)->flags = flags;
1745 1743
1746 /* All done, get rid of second SKB and account for it so 1744 /* Merge over control information. */
1747 * packet counting does not break. 1745 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
1748 */ 1746 TCP_SKB_CB(skb)->flags = flags;
1749 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
1750 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
1751 tp->retrans_out -= tcp_skb_pcount(next_skb);
1752 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
1753 tp->lost_out -= tcp_skb_pcount(next_skb);
1754 /* Reno case is special. Sigh... */
1755 if (tcp_is_reno(tp) && tp->sacked_out)
1756 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1757
1758 tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb));
1759 tp->packets_out -= tcp_skb_pcount(next_skb);
1760
1761 /* changed transmit queue under us so clear hints */
1762 tcp_clear_retrans_hints_partial(tp);
1763 /* manually tune sacktag skb hint */
1764 if (tp->fastpath_skb_hint == next_skb) {
1765 tp->fastpath_skb_hint = skb;
1766 tp->fastpath_cnt_hint -= tcp_skb_pcount(skb);
1767 }
1768 1747
1769 sk_stream_free_skb(sk, next_skb); 1748 /* All done, get rid of second SKB and account for it so
1770 } 1749 * packet counting does not break.
1750 */
1751 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1752 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
1753 tp->retrans_out -= tcp_skb_pcount(next_skb);
1754 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
1755 tp->lost_out -= tcp_skb_pcount(next_skb);
1756 /* Reno case is special. Sigh... */
1757 if (tcp_is_reno(tp) && tp->sacked_out)
1758 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1759
1760 tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
1761 tp->packets_out -= tcp_skb_pcount(next_skb);
1762
1763 /* changed transmit queue under us so clear hints */
1764 tcp_clear_retrans_hints_partial(tp);
1765
1766 sk_wmem_free_skb(sk, next_skb);
1771} 1767}
1772 1768
1773/* Do a simple retransmit without using the backoff mechanisms in 1769/* Do a simple retransmit without using the backoff mechanisms in
@@ -1786,12 +1782,12 @@ void tcp_simple_retransmit(struct sock *sk)
1786 if (skb == tcp_send_head(sk)) 1782 if (skb == tcp_send_head(sk))
1787 break; 1783 break;
1788 if (skb->len > mss && 1784 if (skb->len > mss &&
1789 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { 1785 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1790 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1786 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1791 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1787 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1792 tp->retrans_out -= tcp_skb_pcount(skb); 1788 tp->retrans_out -= tcp_skb_pcount(skb);
1793 } 1789 }
1794 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { 1790 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
1795 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1791 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1796 tp->lost_out += tcp_skb_pcount(skb); 1792 tp->lost_out += tcp_skb_pcount(skb);
1797 lost = 1; 1793 lost = 1;
@@ -1856,7 +1852,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1856 * case, when window is shrunk to zero. In this case 1852 * case, when window is shrunk to zero. In this case
1857 * our retransmit serves as a zero window probe. 1853 * our retransmit serves as a zero window probe.
1858 */ 1854 */
1859 if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd) 1855 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))
1860 && TCP_SKB_CB(skb)->seq != tp->snd_una) 1856 && TCP_SKB_CB(skb)->seq != tp->snd_una)
1861 return -EAGAIN; 1857 return -EAGAIN;
1862 1858
@@ -1870,8 +1866,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1870 (skb->len < (cur_mss >> 1)) && 1866 (skb->len < (cur_mss >> 1)) &&
1871 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && 1867 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1872 (!tcp_skb_is_last(sk, skb)) && 1868 (!tcp_skb_is_last(sk, skb)) &&
1873 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && 1869 (skb_shinfo(skb)->nr_frags == 0 &&
1874 (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && 1870 skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1871 (tcp_skb_pcount(skb) == 1 &&
1872 tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1875 (sysctl_tcp_retrans_collapse != 0)) 1873 (sysctl_tcp_retrans_collapse != 0))
1876 tcp_retrans_try_collapse(sk, skb, cur_mss); 1874 tcp_retrans_try_collapse(sk, skb, cur_mss);
1877 1875
@@ -1886,12 +1884,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1886 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1884 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1887 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 1885 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1888 if (!pskb_trim(skb, 0)) { 1886 if (!pskb_trim(skb, 0)) {
1889 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; 1887 /* Reuse, even though it does some unnecessary work */
1890 skb_shinfo(skb)->gso_segs = 1; 1888 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
1891 skb_shinfo(skb)->gso_size = 0; 1889 TCP_SKB_CB(skb)->flags);
1892 skb_shinfo(skb)->gso_type = 0;
1893 skb->ip_summed = CHECKSUM_NONE; 1890 skb->ip_summed = CHECKSUM_NONE;
1894 skb->csum = 0;
1895 } 1891 }
1896 } 1892 }
1897 1893
@@ -1909,7 +1905,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1909 tp->total_retrans++; 1905 tp->total_retrans++;
1910 1906
1911#if FASTRETRANS_DEBUG > 0 1907#if FASTRETRANS_DEBUG > 0
1912 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1908 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1913 if (net_ratelimit()) 1909 if (net_ratelimit())
1914 printk(KERN_DEBUG "retrans_out leaked.\n"); 1910 printk(KERN_DEBUG "retrans_out leaked.\n");
1915 } 1911 }
@@ -1951,7 +1947,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1951 if (tp->retransmit_skb_hint) { 1947 if (tp->retransmit_skb_hint) {
1952 skb = tp->retransmit_skb_hint; 1948 skb = tp->retransmit_skb_hint;
1953 packet_cnt = tp->retransmit_cnt_hint; 1949 packet_cnt = tp->retransmit_cnt_hint;
1954 }else{ 1950 } else {
1955 skb = tcp_write_queue_head(sk); 1951 skb = tcp_write_queue_head(sk);
1956 packet_cnt = 0; 1952 packet_cnt = 0;
1957 } 1953 }
@@ -1978,7 +1974,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1978 return; 1974 return;
1979 1975
1980 if (sacked & TCPCB_LOST) { 1976 if (sacked & TCPCB_LOST) {
1981 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 1977 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1982 if (tcp_retransmit_skb(sk, skb)) { 1978 if (tcp_retransmit_skb(sk, skb)) {
1983 tp->retransmit_skb_hint = NULL; 1979 tp->retransmit_skb_hint = NULL;
1984 return; 1980 return;
@@ -2036,7 +2032,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2036 break; 2032 break;
2037 tp->forward_skb_hint = skb; 2033 tp->forward_skb_hint = skb;
2038 2034
2039 if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) 2035 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2040 break; 2036 break;
2041 2037
2042 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) 2038 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
@@ -2060,7 +2056,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2060 } 2056 }
2061} 2057}
2062 2058
2063
2064/* Send a fin. The caller locks the socket for us. This cannot be 2059/* Send a fin. The caller locks the socket for us. This cannot be
2065 * allowed to fail queueing a FIN frame under any circumstances. 2060 * allowed to fail queueing a FIN frame under any circumstances.
2066 */ 2061 */
@@ -2091,16 +2086,9 @@ void tcp_send_fin(struct sock *sk)
2091 2086
2092 /* Reserve space for headers and prepare control bits. */ 2087 /* Reserve space for headers and prepare control bits. */
2093 skb_reserve(skb, MAX_TCP_HEADER); 2088 skb_reserve(skb, MAX_TCP_HEADER);
2094 skb->csum = 0;
2095 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2096 TCP_SKB_CB(skb)->sacked = 0;
2097 skb_shinfo(skb)->gso_segs = 1;
2098 skb_shinfo(skb)->gso_size = 0;
2099 skb_shinfo(skb)->gso_type = 0;
2100
2101 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2089 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2102 TCP_SKB_CB(skb)->seq = tp->write_seq; 2090 tcp_init_nondata_skb(skb, tp->write_seq,
2103 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2091 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2104 tcp_queue_skb(sk, skb); 2092 tcp_queue_skb(sk, skb);
2105 } 2093 }
2106 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2094 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2124,16 +2112,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2124 2112
2125 /* Reserve space for headers and prepare control bits. */ 2113 /* Reserve space for headers and prepare control bits. */
2126 skb_reserve(skb, MAX_TCP_HEADER); 2114 skb_reserve(skb, MAX_TCP_HEADER);
2127 skb->csum = 0; 2115 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2128 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); 2116 TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
2129 TCP_SKB_CB(skb)->sacked = 0;
2130 skb_shinfo(skb)->gso_segs = 1;
2131 skb_shinfo(skb)->gso_size = 0;
2132 skb_shinfo(skb)->gso_type = 0;
2133
2134 /* Send it off. */ 2117 /* Send it off. */
2135 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
2136 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2137 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2118 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2138 if (tcp_transmit_skb(sk, skb, 0, priority)) 2119 if (tcp_transmit_skb(sk, skb, 0, priority))
2139 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2120 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
@@ -2146,14 +2127,14 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2146 */ 2127 */
2147int tcp_send_synack(struct sock *sk) 2128int tcp_send_synack(struct sock *sk)
2148{ 2129{
2149 struct sk_buff* skb; 2130 struct sk_buff *skb;
2150 2131
2151 skb = tcp_write_queue_head(sk); 2132 skb = tcp_write_queue_head(sk);
2152 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { 2133 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
2153 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2134 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2154 return -EFAULT; 2135 return -EFAULT;
2155 } 2136 }
2156 if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) { 2137 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
2157 if (skb_cloned(skb)) { 2138 if (skb_cloned(skb)) {
2158 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2139 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2159 if (nskb == NULL) 2140 if (nskb == NULL)
@@ -2161,8 +2142,9 @@ int tcp_send_synack(struct sock *sk)
2161 tcp_unlink_write_queue(skb, sk); 2142 tcp_unlink_write_queue(skb, sk);
2162 skb_header_release(nskb); 2143 skb_header_release(nskb);
2163 __tcp_add_write_queue_head(sk, nskb); 2144 __tcp_add_write_queue_head(sk, nskb);
2164 sk_stream_free_skb(sk, skb); 2145 sk_wmem_free_skb(sk, skb);
2165 sk_charge_skb(sk, nskb); 2146 sk->sk_wmem_queued += nskb->truesize;
2147 sk_mem_charge(sk, nskb->truesize);
2166 skb = nskb; 2148 skb = nskb;
2167 } 2149 }
2168 2150
@@ -2176,8 +2158,8 @@ int tcp_send_synack(struct sock *sk)
2176/* 2158/*
2177 * Prepare a SYN-ACK. 2159 * Prepare a SYN-ACK.
2178 */ 2160 */
2179struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2161struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2180 struct request_sock *req) 2162 struct request_sock *req)
2181{ 2163{
2182 struct inet_request_sock *ireq = inet_rsk(req); 2164 struct inet_request_sock *ireq = inet_rsk(req);
2183 struct tcp_sock *tp = tcp_sk(sk); 2165 struct tcp_sock *tp = tcp_sk(sk);
@@ -2220,12 +2202,11 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2220 TCP_ECN_make_synack(req, th); 2202 TCP_ECN_make_synack(req, th);
2221 th->source = inet_sk(sk)->sport; 2203 th->source = inet_sk(sk)->sport;
2222 th->dest = ireq->rmt_port; 2204 th->dest = ireq->rmt_port;
2223 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; 2205 /* Setting of flags are superfluous here for callers (and ECE is
2224 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2206 * not even correctly set)
2225 TCP_SKB_CB(skb)->sacked = 0; 2207 */
2226 skb_shinfo(skb)->gso_segs = 1; 2208 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2227 skb_shinfo(skb)->gso_size = 0; 2209 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
2228 skb_shinfo(skb)->gso_type = 0;
2229 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2210 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2230 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); 2211 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2231 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 2212 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2257,7 +2238,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2257 NULL) 2238 NULL)
2258 ); 2239 );
2259 2240
2260 skb->csum = 0;
2261 th->doff = (tcp_header_size >> 2); 2241 th->doff = (tcp_header_size >> 2);
2262 TCP_INC_STATS(TCP_MIB_OUTSEGS); 2242 TCP_INC_STATS(TCP_MIB_OUTSEGS);
2263 2243
@@ -2349,23 +2329,17 @@ int tcp_connect(struct sock *sk)
2349 /* Reserve space for headers. */ 2329 /* Reserve space for headers. */
2350 skb_reserve(buff, MAX_TCP_HEADER); 2330 skb_reserve(buff, MAX_TCP_HEADER);
2351 2331
2352 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
2353 TCP_ECN_send_syn(sk, buff);
2354 TCP_SKB_CB(buff)->sacked = 0;
2355 skb_shinfo(buff)->gso_segs = 1;
2356 skb_shinfo(buff)->gso_size = 0;
2357 skb_shinfo(buff)->gso_type = 0;
2358 buff->csum = 0;
2359 tp->snd_nxt = tp->write_seq; 2332 tp->snd_nxt = tp->write_seq;
2360 TCP_SKB_CB(buff)->seq = tp->write_seq++; 2333 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
2361 TCP_SKB_CB(buff)->end_seq = tp->write_seq; 2334 TCP_ECN_send_syn(sk, buff);
2362 2335
2363 /* Send it off. */ 2336 /* Send it off. */
2364 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2337 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2365 tp->retrans_stamp = TCP_SKB_CB(buff)->when; 2338 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2366 skb_header_release(buff); 2339 skb_header_release(buff);
2367 __tcp_add_write_queue_tail(sk, buff); 2340 __tcp_add_write_queue_tail(sk, buff);
2368 sk_charge_skb(sk, buff); 2341 sk->sk_wmem_queued += buff->truesize;
2342 sk_mem_charge(sk, buff->truesize);
2369 tp->packets_out += tcp_skb_pcount(buff); 2343 tp->packets_out += tcp_skb_pcount(buff);
2370 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); 2344 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
2371 2345
@@ -2394,9 +2368,10 @@ void tcp_send_delayed_ack(struct sock *sk)
2394 2368
2395 if (ato > TCP_DELACK_MIN) { 2369 if (ato > TCP_DELACK_MIN) {
2396 const struct tcp_sock *tp = tcp_sk(sk); 2370 const struct tcp_sock *tp = tcp_sk(sk);
2397 int max_ato = HZ/2; 2371 int max_ato = HZ / 2;
2398 2372
2399 if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) 2373 if (icsk->icsk_ack.pingpong ||
2374 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2400 max_ato = TCP_DELACK_MAX; 2375 max_ato = TCP_DELACK_MAX;
2401 2376
2402 /* Slow path, intersegment interval is "high". */ 2377 /* Slow path, intersegment interval is "high". */
@@ -2406,7 +2381,7 @@ void tcp_send_delayed_ack(struct sock *sk)
2406 * directly. 2381 * directly.
2407 */ 2382 */
2408 if (tp->srtt) { 2383 if (tp->srtt) {
2409 int rtt = max(tp->srtt>>3, TCP_DELACK_MIN); 2384 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2410 2385
2411 if (rtt < max_ato) 2386 if (rtt < max_ato)
2412 max_ato = rtt; 2387 max_ato = rtt;
@@ -2440,37 +2415,32 @@ void tcp_send_delayed_ack(struct sock *sk)
2440/* This routine sends an ack and also updates the window. */ 2415/* This routine sends an ack and also updates the window. */
2441void tcp_send_ack(struct sock *sk) 2416void tcp_send_ack(struct sock *sk)
2442{ 2417{
2443 /* If we have been reset, we may not send again. */ 2418 struct sk_buff *buff;
2444 if (sk->sk_state != TCP_CLOSE) {
2445 struct sk_buff *buff;
2446 2419
2447 /* We are not putting this on the write queue, so 2420 /* If we have been reset, we may not send again. */
2448 * tcp_transmit_skb() will set the ownership to this 2421 if (sk->sk_state == TCP_CLOSE)
2449 * sock. 2422 return;
2450 */
2451 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2452 if (buff == NULL) {
2453 inet_csk_schedule_ack(sk);
2454 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2455 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2456 TCP_DELACK_MAX, TCP_RTO_MAX);
2457 return;
2458 }
2459 2423
2460 /* Reserve space for headers and prepare control bits. */ 2424 /* We are not putting this on the write queue, so
2461 skb_reserve(buff, MAX_TCP_HEADER); 2425 * tcp_transmit_skb() will set the ownership to this
2462 buff->csum = 0; 2426 * sock.
2463 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; 2427 */
2464 TCP_SKB_CB(buff)->sacked = 0; 2428 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2465 skb_shinfo(buff)->gso_segs = 1; 2429 if (buff == NULL) {
2466 skb_shinfo(buff)->gso_size = 0; 2430 inet_csk_schedule_ack(sk);
2467 skb_shinfo(buff)->gso_type = 0; 2431 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2468 2432 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2469 /* Send it off, this clears delayed acks for us. */ 2433 TCP_DELACK_MAX, TCP_RTO_MAX);
2470 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk); 2434 return;
2471 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2472 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2473 } 2435 }
2436
2437 /* Reserve space for headers and prepare control bits. */
2438 skb_reserve(buff, MAX_TCP_HEADER);
2439 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
2440
2441 /* Send it off, this clears delayed acks for us. */
2442 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2443 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2474} 2444}
2475 2445
2476/* This routine sends a packet with an out of date sequence 2446/* This routine sends a packet with an out of date sequence
@@ -2496,66 +2466,57 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2496 2466
2497 /* Reserve space for headers and set control bits. */ 2467 /* Reserve space for headers and set control bits. */
2498 skb_reserve(skb, MAX_TCP_HEADER); 2468 skb_reserve(skb, MAX_TCP_HEADER);
2499 skb->csum = 0;
2500 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
2501 TCP_SKB_CB(skb)->sacked = urgent;
2502 skb_shinfo(skb)->gso_segs = 1;
2503 skb_shinfo(skb)->gso_size = 0;
2504 skb_shinfo(skb)->gso_type = 0;
2505
2506 /* Use a previous sequence. This should cause the other 2469 /* Use a previous sequence. This should cause the other
2507 * end to send an ack. Don't queue or clone SKB, just 2470 * end to send an ack. Don't queue or clone SKB, just
2508 * send it. 2471 * send it.
2509 */ 2472 */
2510 TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; 2473 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
2511 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2512 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2474 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2513 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2475 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2514} 2476}
2515 2477
2516int tcp_write_wakeup(struct sock *sk) 2478int tcp_write_wakeup(struct sock *sk)
2517{ 2479{
2518 if (sk->sk_state != TCP_CLOSE) { 2480 struct tcp_sock *tp = tcp_sk(sk);
2519 struct tcp_sock *tp = tcp_sk(sk); 2481 struct sk_buff *skb;
2520 struct sk_buff *skb; 2482
2521 2483 if (sk->sk_state == TCP_CLOSE)
2522 if ((skb = tcp_send_head(sk)) != NULL && 2484 return -1;
2523 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { 2485
2524 int err; 2486 if ((skb = tcp_send_head(sk)) != NULL &&
2525 unsigned int mss = tcp_current_mss(sk, 0); 2487 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2526 unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; 2488 int err;
2527 2489 unsigned int mss = tcp_current_mss(sk, 0);
2528 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) 2490 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2529 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; 2491
2530 2492 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2531 /* We are probing the opening of a window 2493 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2532 * but the window size is != 0
2533 * must have been a result SWS avoidance ( sender )
2534 */
2535 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2536 skb->len > mss) {
2537 seg_size = min(seg_size, mss);
2538 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2539 if (tcp_fragment(sk, skb, seg_size, mss))
2540 return -1;
2541 } else if (!tcp_skb_pcount(skb))
2542 tcp_set_skb_tso_segs(sk, skb, mss);
2543 2494
2495 /* We are probing the opening of a window
2496 * but the window size is != 0
2497 * must have been a result SWS avoidance ( sender )
2498 */
2499 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2500 skb->len > mss) {
2501 seg_size = min(seg_size, mss);
2544 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2502 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2545 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2503 if (tcp_fragment(sk, skb, seg_size, mss))
2546 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2504 return -1;
2547 if (!err) { 2505 } else if (!tcp_skb_pcount(skb))
2548 update_send_head(sk, skb); 2506 tcp_set_skb_tso_segs(sk, skb, mss);
2549 } 2507
2550 return err; 2508 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2551 } else { 2509 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2552 if (tp->urg_mode && 2510 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2553 between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF)) 2511 if (!err)
2554 tcp_xmit_probe_skb(sk, TCPCB_URG); 2512 tcp_event_new_data_sent(sk, skb);
2555 return tcp_xmit_probe_skb(sk, 0); 2513 return err;
2556 } 2514 } else {
2515 if (tp->urg_mode &&
2516 between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
2517 tcp_xmit_probe_skb(sk, 1);
2518 return tcp_xmit_probe_skb(sk, 0);
2557 } 2519 }
2558 return -1;
2559} 2520}
2560 2521
2561/* A window probe timeout has occurred. If window is not closed send 2522/* A window probe timeout has occurred. If window is not closed send
@@ -2603,5 +2564,4 @@ EXPORT_SYMBOL(tcp_connect);
2603EXPORT_SYMBOL(tcp_make_synack); 2564EXPORT_SYMBOL(tcp_make_synack);
2604EXPORT_SYMBOL(tcp_simple_retransmit); 2565EXPORT_SYMBOL(tcp_simple_retransmit);
2605EXPORT_SYMBOL(tcp_sync_mss); 2566EXPORT_SYMBOL(tcp_sync_mss);
2606EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
2607EXPORT_SYMBOL(tcp_mtup_init); 2567EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index be27a33a1c68..2747ec7bfb63 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,8 +15,7 @@
15#define TCP_SCALABLE_AI_CNT 50U 15#define TCP_SCALABLE_AI_CNT 50U
16#define TCP_SCALABLE_MD_SCALE 3 16#define TCP_SCALABLE_MD_SCALE 3
17 17
18static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, 18static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
19 u32 in_flight, int flag)
20{ 19{
21 struct tcp_sock *tp = tcp_sk(sk); 20 struct tcp_sock *tp = tcp_sk(sk);
22 21
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d8970ecfcfc8..803d758a2b12 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -114,13 +114,31 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
114 return retries; 114 return retries;
115} 115}
116 116
117static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
118{
119 /* Black hole detection */
120 if (sysctl_tcp_mtu_probing) {
121 if (!icsk->icsk_mtup.enabled) {
122 icsk->icsk_mtup.enabled = 1;
123 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
124 } else {
125 struct tcp_sock *tp = tcp_sk(sk);
126 int mss;
127
128 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
129 mss = min(sysctl_tcp_base_mss, mss);
130 mss = max(mss, 68 - tp->tcp_header_len);
131 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
132 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
133 }
134 }
135}
136
117/* A write timeout has occurred. Process the after effects. */ 137/* A write timeout has occurred. Process the after effects. */
118static int tcp_write_timeout(struct sock *sk) 138static int tcp_write_timeout(struct sock *sk)
119{ 139{
120 struct inet_connection_sock *icsk = inet_csk(sk); 140 struct inet_connection_sock *icsk = inet_csk(sk);
121 struct tcp_sock *tp = tcp_sk(sk);
122 int retry_until; 141 int retry_until;
123 int mss;
124 142
125 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 143 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
126 if (icsk->icsk_retransmits) 144 if (icsk->icsk_retransmits)
@@ -129,18 +147,7 @@ static int tcp_write_timeout(struct sock *sk)
129 } else { 147 } else {
130 if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { 148 if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
131 /* Black hole detection */ 149 /* Black hole detection */
132 if (sysctl_tcp_mtu_probing) { 150 tcp_mtu_probing(icsk, sk);
133 if (!icsk->icsk_mtup.enabled) {
134 icsk->icsk_mtup.enabled = 1;
135 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
136 } else {
137 mss = min(sysctl_tcp_base_mss,
138 tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)/2);
139 mss = max(mss, 68 - tp->tcp_header_len);
140 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
141 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
142 }
143 }
144 151
145 dst_negative_advice(&sk->sk_dst_cache); 152 dst_negative_advice(&sk->sk_dst_cache);
146 } 153 }
@@ -179,7 +186,7 @@ static void tcp_delack_timer(unsigned long data)
179 goto out_unlock; 186 goto out_unlock;
180 } 187 }
181 188
182 sk_stream_mem_reclaim(sk); 189 sk_mem_reclaim_partial(sk);
183 190
184 if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) 191 if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
185 goto out; 192 goto out;
@@ -219,7 +226,7 @@ static void tcp_delack_timer(unsigned long data)
219 226
220out: 227out:
221 if (tcp_memory_pressure) 228 if (tcp_memory_pressure)
222 sk_stream_mem_reclaim(sk); 229 sk_mem_reclaim(sk);
223out_unlock: 230out_unlock:
224 bh_unlock_sock(sk); 231 bh_unlock_sock(sk);
225 sock_put(sk); 232 sock_put(sk);
@@ -413,7 +420,7 @@ static void tcp_write_timer(unsigned long data)
413 TCP_CHECK_TIMER(sk); 420 TCP_CHECK_TIMER(sk);
414 421
415out: 422out:
416 sk_stream_mem_reclaim(sk); 423 sk_mem_reclaim(sk);
417out_unlock: 424out_unlock:
418 bh_unlock_sock(sk); 425 bh_unlock_sock(sk);
419 sock_put(sk); 426 sock_put(sk);
@@ -507,7 +514,7 @@ static void tcp_keepalive_timer (unsigned long data)
507 } 514 }
508 515
509 TCP_CHECK_TIMER(sk); 516 TCP_CHECK_TIMER(sk);
510 sk_stream_mem_reclaim(sk); 517 sk_mem_reclaim(sk);
511 518
512resched: 519resched:
513 inet_csk_reset_keepalive_timer (sk, elapsed); 520 inet_csk_reset_keepalive_timer (sk, elapsed);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index b49dedcda52d..be24d6ee34bd 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -162,14 +162,13 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
162} 162}
163EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); 163EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
164 164
165static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, 165static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
166 u32 in_flight, int flag)
167{ 166{
168 struct tcp_sock *tp = tcp_sk(sk); 167 struct tcp_sock *tp = tcp_sk(sk);
169 struct vegas *vegas = inet_csk_ca(sk); 168 struct vegas *vegas = inet_csk_ca(sk);
170 169
171 if (!vegas->doing_vegas_now) 170 if (!vegas->doing_vegas_now)
172 return tcp_reno_cong_avoid(sk, ack, in_flight, flag); 171 return tcp_reno_cong_avoid(sk, ack, in_flight);
173 172
174 /* The key players are v_beg_snd_una and v_beg_snd_nxt. 173 /* The key players are v_beg_snd_una and v_beg_snd_nxt.
175 * 174 *
@@ -228,7 +227,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
228 /* We don't have enough RTT samples to do the Vegas 227 /* We don't have enough RTT samples to do the Vegas
229 * calculation, so we'll behave like Reno. 228 * calculation, so we'll behave like Reno.
230 */ 229 */
231 tcp_reno_cong_avoid(sk, ack, in_flight, flag); 230 tcp_reno_cong_avoid(sk, ack, in_flight);
232 } else { 231 } else {
233 u32 rtt, target_cwnd, diff; 232 u32 rtt, target_cwnd, diff;
234 233
@@ -266,26 +265,25 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
266 */ 265 */
267 diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; 266 diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
268 267
269 if (tp->snd_cwnd <= tp->snd_ssthresh) { 268 if (diff > gamma && tp->snd_ssthresh > 2 ) {
270 /* Slow start. */ 269 /* Going too fast. Time to slow down
271 if (diff > gamma) { 270 * and switch to congestion avoidance.
272 /* Going too fast. Time to slow down 271 */
273 * and switch to congestion avoidance. 272 tp->snd_ssthresh = 2;
274 */ 273
275 tp->snd_ssthresh = 2; 274 /* Set cwnd to match the actual rate
276 275 * exactly:
277 /* Set cwnd to match the actual rate 276 * cwnd = (actual rate) * baseRTT
278 * exactly: 277 * Then we add 1 because the integer
279 * cwnd = (actual rate) * baseRTT 278 * truncation robs us of full link
280 * Then we add 1 because the integer 279 * utilization.
281 * truncation robs us of full link 280 */
282 * utilization. 281 tp->snd_cwnd = min(tp->snd_cwnd,
283 */ 282 (target_cwnd >>
284 tp->snd_cwnd = min(tp->snd_cwnd, 283 V_PARAM_SHIFT)+1);
285 (target_cwnd >>
286 V_PARAM_SHIFT)+1);
287 284
288 } 285 } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
286 /* Slow start. */
289 tcp_slow_start(tp); 287 tcp_slow_start(tp);
290 } else { 288 } else {
291 /* Congestion avoidance. */ 289 /* Congestion avoidance. */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 8fb2aee0b1a4..d16689e98516 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,14 +114,13 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
114 tcp_veno_init(sk); 114 tcp_veno_init(sk);
115} 115}
116 116
117static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, 117static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
118 u32 in_flight, int flag)
119{ 118{
120 struct tcp_sock *tp = tcp_sk(sk); 119 struct tcp_sock *tp = tcp_sk(sk);
121 struct veno *veno = inet_csk_ca(sk); 120 struct veno *veno = inet_csk_ca(sk);
122 121
123 if (!veno->doing_veno_now) 122 if (!veno->doing_veno_now)
124 return tcp_reno_cong_avoid(sk, ack, in_flight, flag); 123 return tcp_reno_cong_avoid(sk, ack, in_flight);
125 124
126 /* limited by applications */ 125 /* limited by applications */
127 if (!tcp_is_cwnd_limited(sk, in_flight)) 126 if (!tcp_is_cwnd_limited(sk, in_flight))
@@ -132,7 +131,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack,
132 /* We don't have enough rtt samples to do the Veno 131 /* We don't have enough rtt samples to do the Veno
133 * calculation, so we'll behave like Reno. 132 * calculation, so we'll behave like Reno.
134 */ 133 */
135 tcp_reno_cong_avoid(sk, ack, in_flight, flag); 134 tcp_reno_cong_avoid(sk, ack, in_flight);
136 } else { 135 } else {
137 u32 rtt, target_cwnd; 136 u32 rtt, target_cwnd;
138 137
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index c107fba7430e..e03b10183a8b 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,8 +69,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
69 tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); 69 tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
70} 70}
71 71
72static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, 72static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
73 u32 in_flight, int flag)
74{ 73{
75 struct tcp_sock *tp = tcp_sk(sk); 74 struct tcp_sock *tp = tcp_sk(sk);
76 struct yeah *yeah = inet_csk_ca(sk); 75 struct yeah *yeah = inet_csk_ca(sk);
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index a794a8ca8b4f..978b3fd61e65 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -17,6 +17,11 @@ static struct xfrm_tunnel *tunnel4_handlers;
17static struct xfrm_tunnel *tunnel64_handlers; 17static struct xfrm_tunnel *tunnel64_handlers;
18static DEFINE_MUTEX(tunnel4_mutex); 18static DEFINE_MUTEX(tunnel4_mutex);
19 19
20static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
21{
22 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
23}
24
20int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) 25int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
21{ 26{
22 struct xfrm_tunnel **pprev; 27 struct xfrm_tunnel **pprev;
@@ -25,8 +30,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
25 30
26 mutex_lock(&tunnel4_mutex); 31 mutex_lock(&tunnel4_mutex);
27 32
28 for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; 33 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
29 *pprev; pprev = &(*pprev)->next) {
30 if ((*pprev)->priority > priority) 34 if ((*pprev)->priority > priority)
31 break; 35 break;
32 if ((*pprev)->priority == priority) 36 if ((*pprev)->priority == priority)
@@ -53,8 +57,7 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
53 57
54 mutex_lock(&tunnel4_mutex); 58 mutex_lock(&tunnel4_mutex);
55 59
56 for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; 60 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
57 *pprev; pprev = &(*pprev)->next) {
58 if (*pprev == handler) { 61 if (*pprev == handler) {
59 *pprev = handler->next; 62 *pprev = handler->next;
60 ret = 0; 63 ret = 0;
@@ -118,6 +121,17 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
118 break; 121 break;
119} 122}
120 123
124#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
125static void tunnel64_err(struct sk_buff *skb, u32 info)
126{
127 struct xfrm_tunnel *handler;
128
129 for (handler = tunnel64_handlers; handler; handler = handler->next)
130 if (!handler->err_handler(skb, info))
131 break;
132}
133#endif
134
121static struct net_protocol tunnel4_protocol = { 135static struct net_protocol tunnel4_protocol = {
122 .handler = tunnel4_rcv, 136 .handler = tunnel4_rcv,
123 .err_handler = tunnel4_err, 137 .err_handler = tunnel4_err,
@@ -127,7 +141,7 @@ static struct net_protocol tunnel4_protocol = {
127#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 141#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
128static struct net_protocol tunnel64_protocol = { 142static struct net_protocol tunnel64_protocol = {
129 .handler = tunnel64_rcv, 143 .handler = tunnel64_rcv,
130 .err_handler = tunnel4_err, 144 .err_handler = tunnel64_err,
131 .no_policy = 1, 145 .no_policy = 1,
132}; 146};
133#endif 147#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4bc25b46f33f..7ea1b67b6de1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -82,6 +82,7 @@
82#include <asm/system.h> 82#include <asm/system.h>
83#include <asm/uaccess.h> 83#include <asm/uaccess.h>
84#include <asm/ioctls.h> 84#include <asm/ioctls.h>
85#include <linux/bootmem.h>
85#include <linux/types.h> 86#include <linux/types.h>
86#include <linux/fcntl.h> 87#include <linux/fcntl.h>
87#include <linux/module.h> 88#include <linux/module.h>
@@ -110,18 +111,33 @@
110 */ 111 */
111 112
112DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; 113DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
114EXPORT_SYMBOL(udp_statistics);
115
116DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
117EXPORT_SYMBOL(udp_stats_in6);
113 118
114struct hlist_head udp_hash[UDP_HTABLE_SIZE]; 119struct hlist_head udp_hash[UDP_HTABLE_SIZE];
115DEFINE_RWLOCK(udp_hash_lock); 120DEFINE_RWLOCK(udp_hash_lock);
116 121
117static inline int __udp_lib_lport_inuse(__u16 num, 122int sysctl_udp_mem[3] __read_mostly;
123int sysctl_udp_rmem_min __read_mostly;
124int sysctl_udp_wmem_min __read_mostly;
125
126EXPORT_SYMBOL(sysctl_udp_mem);
127EXPORT_SYMBOL(sysctl_udp_rmem_min);
128EXPORT_SYMBOL(sysctl_udp_wmem_min);
129
130atomic_t udp_memory_allocated;
131EXPORT_SYMBOL(udp_memory_allocated);
132
133static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
118 const struct hlist_head udptable[]) 134 const struct hlist_head udptable[])
119{ 135{
120 struct sock *sk; 136 struct sock *sk;
121 struct hlist_node *node; 137 struct hlist_node *node;
122 138
123 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) 139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
124 if (sk->sk_hash == num) 140 if (sk->sk_net == net && sk->sk_hash == num)
125 return 1; 141 return 1;
126 return 0; 142 return 0;
127} 143}
@@ -143,6 +159,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
143 struct hlist_head *head; 159 struct hlist_head *head;
144 struct sock *sk2; 160 struct sock *sk2;
145 int error = 1; 161 int error = 1;
162 struct net *net = sk->sk_net;
146 163
147 write_lock_bh(&udp_hash_lock); 164 write_lock_bh(&udp_hash_lock);
148 165
@@ -182,7 +199,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
182 /* 2nd pass: find hole in shortest hash chain */ 199 /* 2nd pass: find hole in shortest hash chain */
183 rover = best; 200 rover = best;
184 for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { 201 for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
185 if (! __udp_lib_lport_inuse(rover, udptable)) 202 if (! __udp_lib_lport_inuse(net, rover, udptable))
186 goto gotit; 203 goto gotit;
187 rover += UDP_HTABLE_SIZE; 204 rover += UDP_HTABLE_SIZE;
188 if (rover > high) 205 if (rover > high)
@@ -202,6 +219,7 @@ gotit:
202 sk_for_each(sk2, node, head) 219 sk_for_each(sk2, node, head)
203 if (sk2->sk_hash == snum && 220 if (sk2->sk_hash == snum &&
204 sk2 != sk && 221 sk2 != sk &&
222 sk2->sk_net == net &&
205 (!sk2->sk_reuse || !sk->sk_reuse) && 223 (!sk2->sk_reuse || !sk->sk_reuse) &&
206 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 224 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
207 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 225 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -214,7 +232,7 @@ gotit:
214 if (sk_unhashed(sk)) { 232 if (sk_unhashed(sk)) {
215 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 233 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
216 sk_add_node(sk, head); 234 sk_add_node(sk, head);
217 sock_prot_inc_use(sk->sk_prot); 235 sock_prot_inuse_add(sk->sk_prot, 1);
218 } 236 }
219 error = 0; 237 error = 0;
220fail: 238fail:
@@ -245,9 +263,9 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
245/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 263/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
246 * harder than this. -DaveM 264 * harder than this. -DaveM
247 */ 265 */
248static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, 266static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
249 __be32 daddr, __be16 dport, 267 __be16 sport, __be32 daddr, __be16 dport,
250 int dif, struct hlist_head udptable[]) 268 int dif, struct hlist_head udptable[])
251{ 269{
252 struct sock *sk, *result = NULL; 270 struct sock *sk, *result = NULL;
253 struct hlist_node *node; 271 struct hlist_node *node;
@@ -258,7 +276,8 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
258 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { 276 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
259 struct inet_sock *inet = inet_sk(sk); 277 struct inet_sock *inet = inet_sk(sk);
260 278
261 if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { 279 if (sk->sk_net == net && sk->sk_hash == hnum &&
280 !ipv6_only_sock(sk)) {
262 int score = (sk->sk_family == PF_INET ? 1 : 0); 281 int score = (sk->sk_family == PF_INET ? 1 : 0);
263 if (inet->rcv_saddr) { 282 if (inet->rcv_saddr) {
264 if (inet->rcv_saddr != daddr) 283 if (inet->rcv_saddr != daddr)
@@ -345,8 +364,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
345 int harderr; 364 int harderr;
346 int err; 365 int err;
347 366
348 sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, 367 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
349 skb->dev->ifindex, udptable ); 368 iph->saddr, uh->source, skb->dev->ifindex, udptable);
350 if (sk == NULL) { 369 if (sk == NULL) {
351 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 370 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
352 return; /* No socket for error */ 371 return; /* No socket for error */
@@ -402,7 +421,7 @@ out:
402 421
403void udp_err(struct sk_buff *skb, u32 info) 422void udp_err(struct sk_buff *skb, u32 info)
404{ 423{
405 return __udp4_lib_err(skb, info, udp_hash); 424 __udp4_lib_err(skb, info, udp_hash);
406} 425}
407 426
408/* 427/*
@@ -471,6 +490,7 @@ static int udp_push_pending_frames(struct sock *sk)
471 struct sk_buff *skb; 490 struct sk_buff *skb;
472 struct udphdr *uh; 491 struct udphdr *uh;
473 int err = 0; 492 int err = 0;
493 int is_udplite = IS_UDPLITE(sk);
474 __wsum csum = 0; 494 __wsum csum = 0;
475 495
476 /* Grab the skbuff where UDP header space exists. */ 496 /* Grab the skbuff where UDP header space exists. */
@@ -486,7 +506,7 @@ static int udp_push_pending_frames(struct sock *sk)
486 uh->len = htons(up->len); 506 uh->len = htons(up->len);
487 uh->check = 0; 507 uh->check = 0;
488 508
489 if (up->pcflag) /* UDP-Lite */ 509 if (is_udplite) /* UDP-Lite */
490 csum = udplite_csum_outgoing(sk, skb); 510 csum = udplite_csum_outgoing(sk, skb);
491 511
492 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ 512 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
@@ -514,7 +534,7 @@ out:
514 up->len = 0; 534 up->len = 0;
515 up->pending = 0; 535 up->pending = 0;
516 if (!err) 536 if (!err)
517 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); 537 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
518 return err; 538 return err;
519} 539}
520 540
@@ -531,7 +551,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
531 __be32 daddr, faddr, saddr; 551 __be32 daddr, faddr, saddr;
532 __be16 dport; 552 __be16 dport;
533 u8 tos; 553 u8 tos;
534 int err, is_udplite = up->pcflag; 554 int err, is_udplite = IS_UDPLITE(sk);
535 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 555 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
536 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 556 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
537 557
@@ -621,7 +641,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
621 connected = 0; 641 connected = 0;
622 } 642 }
623 643
624 if (MULTICAST(daddr)) { 644 if (ipv4_is_multicast(daddr)) {
625 if (!ipc.oif) 645 if (!ipc.oif)
626 ipc.oif = inet->mc_index; 646 ipc.oif = inet->mc_index;
627 if (!saddr) 647 if (!saddr)
@@ -643,7 +663,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
643 { .sport = inet->sport, 663 { .sport = inet->sport,
644 .dport = dport } } }; 664 .dport = dport } } };
645 security_sk_classify_flow(sk, &fl); 665 security_sk_classify_flow(sk, &fl);
646 err = ip_route_output_flow(&rt, &fl, sk, 1); 666 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
647 if (err) { 667 if (err) {
648 if (err == -ENETUNREACH) 668 if (err == -ENETUNREACH)
649 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 669 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -825,6 +845,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
825 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 845 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
826 struct sk_buff *skb; 846 struct sk_buff *skb;
827 unsigned int ulen, copied; 847 unsigned int ulen, copied;
848 int peeked;
828 int err; 849 int err;
829 int is_udplite = IS_UDPLITE(sk); 850 int is_udplite = IS_UDPLITE(sk);
830 851
@@ -838,7 +859,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
838 return ip_recv_error(sk, msg, len); 859 return ip_recv_error(sk, msg, len);
839 860
840try_again: 861try_again:
841 skb = skb_recv_datagram(sk, flags, noblock, &err); 862 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
863 &peeked, &err);
842 if (!skb) 864 if (!skb)
843 goto out; 865 goto out;
844 866
@@ -873,6 +895,9 @@ try_again:
873 if (err) 895 if (err)
874 goto out_free; 896 goto out_free;
875 897
898 if (!peeked)
899 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
900
876 sock_recv_timestamp(msg, sk, skb); 901 sock_recv_timestamp(msg, sk, skb);
877 902
878 /* Copy the address. */ 903 /* Copy the address. */
@@ -891,14 +916,17 @@ try_again:
891 err = ulen; 916 err = ulen;
892 917
893out_free: 918out_free:
919 lock_sock(sk);
894 skb_free_datagram(sk, skb); 920 skb_free_datagram(sk, skb);
921 release_sock(sk);
895out: 922out:
896 return err; 923 return err;
897 924
898csum_copy_err: 925csum_copy_err:
899 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); 926 lock_sock(sk);
900 927 if (!skb_kill_datagram(sk, skb, flags))
901 skb_kill_datagram(sk, skb, flags); 928 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
929 release_sock(sk);
902 930
903 if (noblock) 931 if (noblock)
904 return -EAGAIN; 932 return -EAGAIN;
@@ -940,6 +968,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
940{ 968{
941 struct udp_sock *up = udp_sk(sk); 969 struct udp_sock *up = udp_sk(sk);
942 int rc; 970 int rc;
971 int is_udplite = IS_UDPLITE(sk);
943 972
944 /* 973 /*
945 * Charge it to the socket, dropping if the queue is full. 974 * Charge it to the socket, dropping if the queue is full.
@@ -967,7 +996,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
967 996
968 ret = (*up->encap_rcv)(sk, skb); 997 ret = (*up->encap_rcv)(sk, skb);
969 if (ret <= 0) { 998 if (ret <= 0) {
970 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); 999 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
1000 is_udplite);
971 return -ret; 1001 return -ret;
972 } 1002 }
973 } 1003 }
@@ -978,7 +1008,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
978 /* 1008 /*
979 * UDP-Lite specific tests, ignored on UDP sockets 1009 * UDP-Lite specific tests, ignored on UDP sockets
980 */ 1010 */
981 if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 1011 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
982 1012
983 /* 1013 /*
984 * MIB statistics other than incrementing the error count are 1014 * MIB statistics other than incrementing the error count are
@@ -1019,15 +1049,14 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1019 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 1049 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1020 /* Note that an ENOMEM error is charged twice */ 1050 /* Note that an ENOMEM error is charged twice */
1021 if (rc == -ENOMEM) 1051 if (rc == -ENOMEM)
1022 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); 1052 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
1023 goto drop; 1053 goto drop;
1024 } 1054 }
1025 1055
1026 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
1027 return 0; 1056 return 0;
1028 1057
1029drop: 1058drop:
1030 UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); 1059 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
1031 kfree_skb(skb); 1060 kfree_skb(skb);
1032 return -1; 1061 return -1;
1033} 1062}
@@ -1062,7 +1091,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1062 skb1 = skb_clone(skb, GFP_ATOMIC); 1091 skb1 = skb_clone(skb, GFP_ATOMIC);
1063 1092
1064 if (skb1) { 1093 if (skb1) {
1065 int ret = udp_queue_rcv_skb(sk, skb1); 1094 int ret = 0;
1095
1096 bh_lock_sock_nested(sk);
1097 if (!sock_owned_by_user(sk))
1098 ret = udp_queue_rcv_skb(sk, skb1);
1099 else
1100 sk_add_backlog(sk, skb1);
1101 bh_unlock_sock(sk);
1102
1066 if (ret > 0) 1103 if (ret > 0)
1067 /* we should probably re-process instead 1104 /* we should probably re-process instead
1068 * of dropping packets here. */ 1105 * of dropping packets here. */
@@ -1151,11 +1188,17 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1151 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1152 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1189 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1153 1190
1154 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, 1191 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
1155 inet_iif(skb), udptable); 1192 uh->dest, inet_iif(skb), udptable);
1156 1193
1157 if (sk != NULL) { 1194 if (sk != NULL) {
1158 int ret = udp_queue_rcv_skb(sk, skb); 1195 int ret = 0;
1196 bh_lock_sock_nested(sk);
1197 if (!sock_owned_by_user(sk))
1198 ret = udp_queue_rcv_skb(sk, skb);
1199 else
1200 sk_add_backlog(sk, skb);
1201 bh_unlock_sock(sk);
1159 sock_put(sk); 1202 sock_put(sk);
1160 1203
1161 /* a return value > 0 means to resubmit the input, but 1204 /* a return value > 0 means to resubmit the input, but
@@ -1236,6 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1236 struct udp_sock *up = udp_sk(sk); 1279 struct udp_sock *up = udp_sk(sk);
1237 int val; 1280 int val;
1238 int err = 0; 1281 int err = 0;
1282 int is_udplite = IS_UDPLITE(sk);
1239 1283
1240 if (optlen<sizeof(int)) 1284 if (optlen<sizeof(int))
1241 return -EINVAL; 1285 return -EINVAL;
@@ -1277,7 +1321,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1277 /* The sender sets actual checksum coverage length via this option. 1321 /* The sender sets actual checksum coverage length via this option.
1278 * The case coverage > packet length is handled by send module. */ 1322 * The case coverage > packet length is handled by send module. */
1279 case UDPLITE_SEND_CSCOV: 1323 case UDPLITE_SEND_CSCOV:
1280 if (!up->pcflag) /* Disable the option on UDP sockets */ 1324 if (!is_udplite) /* Disable the option on UDP sockets */
1281 return -ENOPROTOOPT; 1325 return -ENOPROTOOPT;
1282 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 1326 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1283 val = 8; 1327 val = 8;
@@ -1289,7 +1333,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1289 * sense, this should be set to at least 8 (as done below). If zero is 1333 * sense, this should be set to at least 8 (as done below). If zero is
1290 * used, this again means full checksum coverage. */ 1334 * used, this again means full checksum coverage. */
1291 case UDPLITE_RECV_CSCOV: 1335 case UDPLITE_RECV_CSCOV:
1292 if (!up->pcflag) /* Disable the option on UDP sockets */ 1336 if (!is_udplite) /* Disable the option on UDP sockets */
1293 return -ENOPROTOOPT; 1337 return -ENOPROTOOPT;
1294 if (val != 0 && val < 8) /* Avoid silly minimal values. */ 1338 if (val != 0 && val < 8) /* Avoid silly minimal values. */
1295 val = 8; 1339 val = 8;
@@ -1430,6 +1474,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1430 1474
1431} 1475}
1432 1476
1477DEFINE_PROTO_INUSE(udp)
1478
1433struct proto udp_prot = { 1479struct proto udp_prot = {
1434 .name = "UDP", 1480 .name = "UDP",
1435 .owner = THIS_MODULE, 1481 .owner = THIS_MODULE,
@@ -1447,11 +1493,16 @@ struct proto udp_prot = {
1447 .hash = udp_lib_hash, 1493 .hash = udp_lib_hash,
1448 .unhash = udp_lib_unhash, 1494 .unhash = udp_lib_unhash,
1449 .get_port = udp_v4_get_port, 1495 .get_port = udp_v4_get_port,
1496 .memory_allocated = &udp_memory_allocated,
1497 .sysctl_mem = sysctl_udp_mem,
1498 .sysctl_wmem = &sysctl_udp_wmem_min,
1499 .sysctl_rmem = &sysctl_udp_rmem_min,
1450 .obj_size = sizeof(struct udp_sock), 1500 .obj_size = sizeof(struct udp_sock),
1451#ifdef CONFIG_COMPAT 1501#ifdef CONFIG_COMPAT
1452 .compat_setsockopt = compat_udp_setsockopt, 1502 .compat_setsockopt = compat_udp_setsockopt,
1453 .compat_getsockopt = compat_udp_getsockopt, 1503 .compat_getsockopt = compat_udp_getsockopt,
1454#endif 1504#endif
1505 REF_PROTO_INUSE(udp)
1455}; 1506};
1456 1507
1457/* ------------------------------------------------------------------------ */ 1508/* ------------------------------------------------------------------------ */
@@ -1502,6 +1553,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1502} 1553}
1503 1554
1504static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1555static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1556 __acquires(udp_hash_lock)
1505{ 1557{
1506 read_lock(&udp_hash_lock); 1558 read_lock(&udp_hash_lock);
1507 return *pos ? udp_get_idx(seq, *pos-1) : (void *)1; 1559 return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
@@ -1521,6 +1573,7 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1521} 1573}
1522 1574
1523static void udp_seq_stop(struct seq_file *seq, void *v) 1575static void udp_seq_stop(struct seq_file *seq, void *v)
1576 __releases(udp_hash_lock)
1524{ 1577{
1525 read_unlock(&udp_hash_lock); 1578 read_unlock(&udp_hash_lock);
1526} 1579}
@@ -1641,6 +1694,25 @@ void udp4_proc_exit(void)
1641} 1694}
1642#endif /* CONFIG_PROC_FS */ 1695#endif /* CONFIG_PROC_FS */
1643 1696
1697void __init udp_init(void)
1698{
1699 unsigned long limit;
1700
1701 /* Set the pressure threshold up by the same strategy of TCP. It is a
1702 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1703 * toward zero with the amount of memory, with a floor of 128 pages.
1704 */
1705 limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
1706 limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
1707 limit = max(limit, 128UL);
1708 sysctl_udp_mem[0] = limit / 4 * 3;
1709 sysctl_udp_mem[1] = limit;
1710 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
1711
1712 sysctl_udp_rmem_min = SK_MEM_QUANTUM;
1713 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
1714}
1715
1644EXPORT_SYMBOL(udp_disconnect); 1716EXPORT_SYMBOL(udp_disconnect);
1645EXPORT_SYMBOL(udp_hash); 1717EXPORT_SYMBOL(udp_hash);
1646EXPORT_SYMBOL(udp_hash_lock); 1718EXPORT_SYMBOL(udp_hash_lock);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 94977205abb4..001b881ca36f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -35,7 +35,7 @@ static int udplite_rcv(struct sk_buff *skb)
35 35
36static void udplite_err(struct sk_buff *skb, u32 info) 36static void udplite_err(struct sk_buff *skb, u32 info)
37{ 37{
38 return __udp4_lib_err(skb, info, udplite_hash); 38 __udp4_lib_err(skb, info, udplite_hash);
39} 39}
40 40
41static struct net_protocol udplite_protocol = { 41static struct net_protocol udplite_protocol = {
@@ -44,6 +44,8 @@ static struct net_protocol udplite_protocol = {
44 .no_policy = 1, 44 .no_policy = 1,
45}; 45};
46 46
47DEFINE_PROTO_INUSE(udplite)
48
47struct proto udplite_prot = { 49struct proto udplite_prot = {
48 .name = "UDP-Lite", 50 .name = "UDP-Lite",
49 .owner = THIS_MODULE, 51 .owner = THIS_MODULE,
@@ -67,6 +69,7 @@ struct proto udplite_prot = {
67 .compat_setsockopt = compat_udp_setsockopt, 69 .compat_setsockopt = compat_udp_setsockopt,
68 .compat_getsockopt = compat_udp_getsockopt, 70 .compat_getsockopt = compat_udp_getsockopt,
69#endif 71#endif
72 REF_PROTO_INUSE(udplite)
70}; 73};
71 74
72static struct inet_protosw udplite4_protosw = { 75static struct inet_protosw udplite4_protosw = {
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5e95c8a07efb..390dcb1354a5 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -16,7 +16,11 @@
16#include <net/ip.h> 16#include <net/ip.h>
17#include <net/xfrm.h> 17#include <net/xfrm.h>
18 18
19#ifdef CONFIG_NETFILTER 19int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
20{
21 return xfrm4_extract_header(skb);
22}
23
20static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) 24static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
21{ 25{
22 if (skb->dst == NULL) { 26 if (skb->dst == NULL) {
@@ -31,129 +35,35 @@ drop:
31 kfree_skb(skb); 35 kfree_skb(skb);
32 return NET_RX_DROP; 36 return NET_RX_DROP;
33} 37}
34#endif
35 38
36int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, 39int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
37 int encap_type) 40 int encap_type)
38{ 41{
39 int err; 42 XFRM_SPI_SKB_CB(skb)->family = AF_INET;
40 __be32 seq; 43 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
41 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; 44 return xfrm_input(skb, nexthdr, spi, encap_type);
42 struct xfrm_state *x; 45}
43 int xfrm_nr = 0; 46EXPORT_SYMBOL(xfrm4_rcv_encap);
44 int decaps = 0;
45 unsigned int nhoff = offsetof(struct iphdr, protocol);
46
47 seq = 0;
48 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
49 goto drop;
50
51 do {
52 const struct iphdr *iph = ip_hdr(skb);
53
54 if (xfrm_nr == XFRM_MAX_DEPTH)
55 goto drop;
56
57 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
58 nexthdr, AF_INET);
59 if (x == NULL)
60 goto drop;
61
62 spin_lock(&x->lock);
63 if (unlikely(x->km.state != XFRM_STATE_VALID))
64 goto drop_unlock;
65
66 if ((x->encap ? x->encap->encap_type : 0) != encap_type)
67 goto drop_unlock;
68
69 if (x->props.replay_window && xfrm_replay_check(x, seq))
70 goto drop_unlock;
71
72 if (xfrm_state_check_expire(x))
73 goto drop_unlock;
74
75 nexthdr = x->type->input(x, skb);
76 if (nexthdr <= 0)
77 goto drop_unlock;
78
79 skb_network_header(skb)[nhoff] = nexthdr;
80
81 /* only the first xfrm gets the encap type */
82 encap_type = 0;
83
84 if (x->props.replay_window)
85 xfrm_replay_advance(x, seq);
86
87 x->curlft.bytes += skb->len;
88 x->curlft.packets++;
89
90 spin_unlock(&x->lock);
91
92 xfrm_vec[xfrm_nr++] = x;
93
94 if (x->outer_mode->input(x, skb))
95 goto drop;
96
97 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
98 decaps = 1;
99 break;
100 }
101
102 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
103 if (err < 0)
104 goto drop;
105 } while (!err);
106
107 /* Allocate new secpath or COW existing one. */
108
109 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
110 struct sec_path *sp;
111 sp = secpath_dup(skb->sp);
112 if (!sp)
113 goto drop;
114 if (skb->sp)
115 secpath_put(skb->sp);
116 skb->sp = sp;
117 }
118 if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
119 goto drop;
120
121 memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
122 xfrm_nr * sizeof(xfrm_vec[0]));
123 skb->sp->len += xfrm_nr;
124 47
125 nf_reset(skb); 48int xfrm4_transport_finish(struct sk_buff *skb, int async)
49{
50 struct iphdr *iph = ip_hdr(skb);
126 51
127 if (decaps) { 52 iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
128 dst_release(skb->dst);
129 skb->dst = NULL;
130 netif_rx(skb);
131 return 0;
132 } else {
133#ifdef CONFIG_NETFILTER
134 __skb_push(skb, skb->data - skb_network_header(skb));
135 ip_hdr(skb)->tot_len = htons(skb->len);
136 ip_send_check(ip_hdr(skb));
137 53
138 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, 54#ifndef CONFIG_NETFILTER
139 xfrm4_rcv_encap_finish); 55 if (!async)
140 return 0; 56 return -iph->protocol;
141#else
142 return -ip_hdr(skb)->protocol;
143#endif 57#endif
144 }
145 58
146drop_unlock: 59 __skb_push(skb, skb->data - skb_network_header(skb));
147 spin_unlock(&x->lock); 60 iph->tot_len = htons(skb->len);
148 xfrm_state_put(x); 61 ip_send_check(iph);
149drop:
150 while (--xfrm_nr >= 0)
151 xfrm_state_put(xfrm_vec[xfrm_nr]);
152 62
153 kfree_skb(skb); 63 NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
64 xfrm4_rcv_encap_finish);
154 return 0; 65 return 0;
155} 66}
156EXPORT_SYMBOL(xfrm4_rcv_encap);
157 67
158/* If it's a keepalive packet, then just eat it. 68/* If it's a keepalive packet, then just eat it.
159 * If it's an encapsulated packet, then pass it to the 69 * If it's an encapsulated packet, then pass it to the
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index e42e122414be..e093a7b59e18 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -17,6 +17,21 @@
17#include <net/ip.h> 17#include <net/ip.h>
18#include <net/xfrm.h> 18#include <net/xfrm.h>
19 19
20static void xfrm4_beet_make_header(struct sk_buff *skb)
21{
22 struct iphdr *iph = ip_hdr(skb);
23
24 iph->ihl = 5;
25 iph->version = 4;
26
27 iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
28 iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
29
30 iph->id = XFRM_MODE_SKB_CB(skb)->id;
31 iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
32 iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
33}
34
20/* Add encapsulation header. 35/* Add encapsulation header.
21 * 36 *
22 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 37 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
@@ -40,10 +55,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
40 offsetof(struct iphdr, protocol); 55 offsetof(struct iphdr, protocol);
41 skb->transport_header = skb->network_header + sizeof(*iph); 56 skb->transport_header = skb->network_header + sizeof(*iph);
42 57
58 xfrm4_beet_make_header(skb);
59
43 ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen); 60 ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen);
44 61
45 top_iph = ip_hdr(skb); 62 top_iph = ip_hdr(skb);
46 memmove(top_iph, iph, sizeof(*iph)); 63
47 if (unlikely(optlen)) { 64 if (unlikely(optlen)) {
48 BUG_ON(optlen < 0); 65 BUG_ON(optlen < 0);
49 66
@@ -65,43 +82,46 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
65 82
66static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) 83static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
67{ 84{
68 struct iphdr *iph = ip_hdr(skb); 85 struct iphdr *iph;
69 int phlen = 0;
70 int optlen = 0; 86 int optlen = 0;
71 u8 ph_nexthdr = 0;
72 int err = -EINVAL; 87 int err = -EINVAL;
73 88
74 if (unlikely(iph->protocol == IPPROTO_BEETPH)) { 89 if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
75 struct ip_beet_phdr *ph; 90 struct ip_beet_phdr *ph;
91 int phlen;
76 92
77 if (!pskb_may_pull(skb, sizeof(*ph))) 93 if (!pskb_may_pull(skb, sizeof(*ph)))
78 goto out; 94 goto out;
79 ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1); 95
96 ph = (struct ip_beet_phdr *)skb->data;
80 97
81 phlen = sizeof(*ph) + ph->padlen; 98 phlen = sizeof(*ph) + ph->padlen;
82 optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); 99 optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
83 if (optlen < 0 || optlen & 3 || optlen > 250) 100 if (optlen < 0 || optlen & 3 || optlen > 250)
84 goto out; 101 goto out;
85 102
86 if (!pskb_may_pull(skb, phlen + optlen)) 103 XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
87 goto out;
88 skb->len -= phlen + optlen;
89 104
90 ph_nexthdr = ph->nexthdr; 105 if (!pskb_may_pull(skb, phlen));
106 goto out;
107 __skb_pull(skb, phlen);
91 } 108 }
92 109
93 skb_set_network_header(skb, phlen - sizeof(*iph)); 110 skb_push(skb, sizeof(*iph));
94 memmove(skb_network_header(skb), iph, sizeof(*iph)); 111 skb_reset_network_header(skb);
95 skb_set_transport_header(skb, phlen + optlen); 112
96 skb->data = skb_transport_header(skb); 113 memmove(skb->data - skb->mac_len, skb_mac_header(skb),
114 skb->mac_len);
115 skb_set_mac_header(skb, -skb->mac_len);
116
117 xfrm4_beet_make_header(skb);
97 118
98 iph = ip_hdr(skb); 119 iph = ip_hdr(skb);
99 iph->ihl = (sizeof(*iph) + optlen) / 4; 120
100 iph->tot_len = htons(skb->len + iph->ihl * 4); 121 iph->ihl += optlen / 4;
122 iph->tot_len = htons(skb->len);
101 iph->daddr = x->sel.daddr.a4; 123 iph->daddr = x->sel.daddr.a4;
102 iph->saddr = x->sel.saddr.a4; 124 iph->saddr = x->sel.saddr.a4;
103 if (ph_nexthdr)
104 iph->protocol = ph_nexthdr;
105 iph->check = 0; 125 iph->check = 0;
106 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); 126 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
107 err = 0; 127 err = 0;
@@ -110,8 +130,10 @@ out:
110} 130}
111 131
112static struct xfrm_mode xfrm4_beet_mode = { 132static struct xfrm_mode xfrm4_beet_mode = {
113 .input = xfrm4_beet_input, 133 .input2 = xfrm4_beet_input,
114 .output = xfrm4_beet_output, 134 .input = xfrm_prepare_input,
135 .output2 = xfrm4_beet_output,
136 .output = xfrm4_prepare_output,
115 .owner = THIS_MODULE, 137 .owner = THIS_MODULE,
116 .encap = XFRM_MODE_BEET, 138 .encap = XFRM_MODE_BEET,
117 .flags = XFRM_MODE_FLAG_TUNNEL, 139 .flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e4deecba6dd2..8dee617ee900 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,92 +16,60 @@
16 16
17static inline void ipip_ecn_decapsulate(struct sk_buff *skb) 17static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
18{ 18{
19 struct iphdr *outer_iph = ip_hdr(skb);
20 struct iphdr *inner_iph = ipip_hdr(skb); 19 struct iphdr *inner_iph = ipip_hdr(skb);
21 20
22 if (INET_ECN_is_ce(outer_iph->tos)) 21 if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
23 IP_ECN_set_ce(inner_iph); 22 IP_ECN_set_ce(inner_iph);
24} 23}
25 24
26static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
27{
28 if (INET_ECN_is_ce(iph->tos))
29 IP6_ECN_set_ce(ipv6_hdr(skb));
30}
31
32/* Add encapsulation header. 25/* Add encapsulation header.
33 * 26 *
34 * The top IP header will be constructed per RFC 2401. 27 * The top IP header will be constructed per RFC 2401.
35 */ 28 */
36static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 29static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
37{ 30{
38 struct dst_entry *dst = skb->dst; 31 struct dst_entry *dst = skb->dst;
39 struct xfrm_dst *xdst = (struct xfrm_dst*)dst; 32 struct iphdr *top_iph;
40 struct iphdr *iph, *top_iph;
41 int flags; 33 int flags;
42 34
43 iph = ip_hdr(skb);
44
45 skb_set_network_header(skb, -x->props.header_len); 35 skb_set_network_header(skb, -x->props.header_len);
46 skb->mac_header = skb->network_header + 36 skb->mac_header = skb->network_header +
47 offsetof(struct iphdr, protocol); 37 offsetof(struct iphdr, protocol);
48 skb->transport_header = skb->network_header + sizeof(*iph); 38 skb->transport_header = skb->network_header + sizeof(*top_iph);
49 top_iph = ip_hdr(skb); 39 top_iph = ip_hdr(skb);
50 40
51 top_iph->ihl = 5; 41 top_iph->ihl = 5;
52 top_iph->version = 4; 42 top_iph->version = 4;
53 43
54 flags = x->props.flags; 44 top_iph->protocol = x->inner_mode->afinfo->proto;
55 45
56 /* DS disclosed */ 46 /* DS disclosed */
57 if (xdst->route->ops->family == AF_INET) { 47 top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
58 top_iph->protocol = IPPROTO_IPIP; 48 XFRM_MODE_SKB_CB(skb)->tos);
59 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
60 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
61 0 : (iph->frag_off & htons(IP_DF));
62 }
63#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
64 else {
65 struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph;
66 top_iph->protocol = IPPROTO_IPV6;
67 top_iph->tos = INET_ECN_encapsulate(iph->tos, ipv6_get_dsfield(ipv6h));
68 top_iph->frag_off = 0;
69 }
70#endif
71 49
50 flags = x->props.flags;
72 if (flags & XFRM_STATE_NOECN) 51 if (flags & XFRM_STATE_NOECN)
73 IP_ECN_clear(top_iph); 52 IP_ECN_clear(top_iph);
74 53
75 if (!top_iph->frag_off) 54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
76 __ip_select_ident(top_iph, dst->child, 0); 55 0 : XFRM_MODE_SKB_CB(skb)->frag_off;
56 ip_select_ident(top_iph, dst->child, NULL);
77 57
78 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); 58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
79 59
80 top_iph->saddr = x->props.saddr.a4; 60 top_iph->saddr = x->props.saddr.a4;
81 top_iph->daddr = x->id.daddr.a4; 61 top_iph->daddr = x->id.daddr.a4;
82 62
83 skb->protocol = htons(ETH_P_IP);
84
85 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
86 return 0; 63 return 0;
87} 64}
88 65
89static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 66static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
90{ 67{
91 struct iphdr *iph = ip_hdr(skb);
92 const unsigned char *old_mac; 68 const unsigned char *old_mac;
93 int err = -EINVAL; 69 int err = -EINVAL;
94 70
95 switch (iph->protocol){ 71 if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
96 case IPPROTO_IPIP: 72 goto out;
97 break;
98#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
99 case IPPROTO_IPV6:
100 break;
101#endif
102 default:
103 goto out;
104 }
105 73
106 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 74 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
107 goto out; 75 goto out;
@@ -110,20 +78,11 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
110 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 78 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
111 goto out; 79 goto out;
112 80
113 iph = ip_hdr(skb); 81 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
114 if (iph->protocol == IPPROTO_IPIP) { 82 ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb));
115 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 83 if (!(x->props.flags & XFRM_STATE_NOECN))
116 ipv4_copy_dscp(iph, ipip_hdr(skb)); 84 ipip_ecn_decapsulate(skb);
117 if (!(x->props.flags & XFRM_STATE_NOECN)) 85
118 ipip_ecn_decapsulate(skb);
119 }
120#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
121 else {
122 if (!(x->props.flags & XFRM_STATE_NOECN))
123 ipip6_ecn_decapsulate(iph, skb);
124 skb->protocol = htons(ETH_P_IPV6);
125 }
126#endif
127 old_mac = skb_mac_header(skb); 86 old_mac = skb_mac_header(skb);
128 skb_set_mac_header(skb, -skb->mac_len); 87 skb_set_mac_header(skb, -skb->mac_len);
129 memmove(skb_mac_header(skb), old_mac, skb->mac_len); 88 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
@@ -135,19 +94,21 @@ out:
135} 94}
136 95
137static struct xfrm_mode xfrm4_tunnel_mode = { 96static struct xfrm_mode xfrm4_tunnel_mode = {
138 .input = xfrm4_tunnel_input, 97 .input2 = xfrm4_mode_tunnel_input,
139 .output = xfrm4_tunnel_output, 98 .input = xfrm_prepare_input,
99 .output2 = xfrm4_mode_tunnel_output,
100 .output = xfrm4_prepare_output,
140 .owner = THIS_MODULE, 101 .owner = THIS_MODULE,
141 .encap = XFRM_MODE_TUNNEL, 102 .encap = XFRM_MODE_TUNNEL,
142 .flags = XFRM_MODE_FLAG_TUNNEL, 103 .flags = XFRM_MODE_FLAG_TUNNEL,
143}; 104};
144 105
145static int __init xfrm4_tunnel_init(void) 106static int __init xfrm4_mode_tunnel_init(void)
146{ 107{
147 return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); 108 return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET);
148} 109}
149 110
150static void __exit xfrm4_tunnel_exit(void) 111static void __exit xfrm4_mode_tunnel_exit(void)
151{ 112{
152 int err; 113 int err;
153 114
@@ -155,7 +116,7 @@ static void __exit xfrm4_tunnel_exit(void)
155 BUG_ON(err); 116 BUG_ON(err);
156} 117}
157 118
158module_init(xfrm4_tunnel_init); 119module_init(xfrm4_mode_tunnel_init);
159module_exit(xfrm4_tunnel_exit); 120module_exit(xfrm4_mode_tunnel_exit);
160MODULE_LICENSE("GPL"); 121MODULE_LICENSE("GPL");
161MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); 122MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index c4a7156962bd..d5a58a818021 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,11 +8,12 @@
8 * 2 of the License, or (at your option) any later version. 8 * 2 of the License, or (at your option) any later version.
9 */ 9 */
10 10
11#include <linux/compiler.h>
12#include <linux/if_ether.h> 11#include <linux/if_ether.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter_ipv4.h> 15#include <linux/netfilter_ipv4.h>
16#include <net/dst.h>
16#include <net/ip.h> 17#include <net/ip.h>
17#include <net/xfrm.h> 18#include <net/xfrm.h>
18#include <net/icmp.h> 19#include <net/icmp.h>
@@ -25,8 +26,6 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
25 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 26 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
26 goto out; 27 goto out;
27 28
28 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
29
30 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) 29 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
31 goto out; 30 goto out;
32 31
@@ -40,106 +39,54 @@ out:
40 return ret; 39 return ret;
41} 40}
42 41
43static inline int xfrm4_output_one(struct sk_buff *skb) 42int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
44{ 43{
45 struct dst_entry *dst = skb->dst;
46 struct xfrm_state *x = dst->xfrm;
47 struct iphdr *iph;
48 int err; 44 int err;
49 45
50 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 46 err = xfrm4_tunnel_check_size(skb);
51 err = xfrm4_tunnel_check_size(skb);
52 if (err)
53 goto error_nolock;
54 }
55
56 err = xfrm_output(skb);
57 if (err) 47 if (err)
58 goto error_nolock; 48 return err;
59 49
60 iph = ip_hdr(skb); 50 XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
61 iph->tot_len = htons(skb->len);
62 ip_send_check(iph);
63 51
64 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 52 return xfrm4_extract_header(skb);
65 err = 0;
66
67out_exit:
68 return err;
69error_nolock:
70 kfree_skb(skb);
71 goto out_exit;
72} 53}
73 54
74static int xfrm4_output_finish2(struct sk_buff *skb) 55int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
75{ 56{
76 int err; 57 int err;
77 58
78 while (likely((err = xfrm4_output_one(skb)) == 0)) { 59 err = x->inner_mode->afinfo->extract_output(x, skb);
79 nf_reset(skb); 60 if (err)
80 61 return err;
81 err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
82 skb->dst->dev, dst_output);
83 if (unlikely(err != 1))
84 break;
85 62
86 if (!skb->dst->xfrm) 63 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
87 return dst_output(skb); 64 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED;
88 65
89 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL, 66 skb->protocol = htons(ETH_P_IP);
90 skb->dst->dev, xfrm4_output_finish2);
91 if (unlikely(err != 1))
92 break;
93 }
94 67
95 return err; 68 return x->outer_mode->output2(x, skb);
96} 69}
70EXPORT_SYMBOL(xfrm4_prepare_output);
97 71
98static int xfrm4_output_finish(struct sk_buff *skb) 72static int xfrm4_output_finish(struct sk_buff *skb)
99{ 73{
100 struct sk_buff *segs;
101
102#ifdef CONFIG_NETFILTER 74#ifdef CONFIG_NETFILTER
103 if (!skb->dst->xfrm) { 75 if (!skb->dst->xfrm) {
104 IPCB(skb)->flags |= IPSKB_REROUTED; 76 IPCB(skb)->flags |= IPSKB_REROUTED;
105 return dst_output(skb); 77 return dst_output(skb);
106 } 78 }
107#endif
108 79
109 if (!skb_is_gso(skb)) 80 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
110 return xfrm4_output_finish2(skb); 81#endif
111 82
112 skb->protocol = htons(ETH_P_IP); 83 skb->protocol = htons(ETH_P_IP);
113 segs = skb_gso_segment(skb, 0); 84 return xfrm_output(skb);
114 kfree_skb(skb);
115 if (unlikely(IS_ERR(segs)))
116 return PTR_ERR(segs);
117
118 do {
119 struct sk_buff *nskb = segs->next;
120 int err;
121
122 segs->next = NULL;
123 err = xfrm4_output_finish2(segs);
124
125 if (unlikely(err)) {
126 while ((segs = nskb)) {
127 nskb = segs->next;
128 segs->next = NULL;
129 kfree_skb(segs);
130 }
131 return err;
132 }
133
134 segs = nskb;
135 } while (segs);
136
137 return 0;
138} 85}
139 86
140int xfrm4_output(struct sk_buff *skb) 87int xfrm4_output(struct sk_buff *skb)
141{ 88{
142 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, 89 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb,
143 xfrm4_output_finish, 90 NULL, skb->dst->dev, xfrm4_output_finish,
144 !(IPCB(skb)->flags & IPSKB_REROUTED)); 91 !(IPCB(skb)->flags & IPSKB_REROUTED));
145} 92}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index cc86fb110dd8..10ed70491434 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -8,36 +8,54 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/compiler.h> 11#include <linux/err.h>
12#include <linux/kernel.h>
12#include <linux/inetdevice.h> 13#include <linux/inetdevice.h>
14#include <net/dst.h>
13#include <net/xfrm.h> 15#include <net/xfrm.h>
14#include <net/ip.h> 16#include <net/ip.h>
15 17
16static struct dst_ops xfrm4_dst_ops; 18static struct dst_ops xfrm4_dst_ops;
17static struct xfrm_policy_afinfo xfrm4_policy_afinfo; 19static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
18 20
19static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) 21static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr,
22 xfrm_address_t *daddr)
20{ 23{
21 return __ip_route_output_key((struct rtable**)dst, fl); 24 struct flowi fl = {
22}
23
24static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
25{
26 struct rtable *rt;
27 struct flowi fl_tunnel = {
28 .nl_u = { 25 .nl_u = {
29 .ip4_u = { 26 .ip4_u = {
27 .tos = tos,
30 .daddr = daddr->a4, 28 .daddr = daddr->a4,
31 }, 29 },
32 }, 30 },
33 }; 31 };
32 struct dst_entry *dst;
33 struct rtable *rt;
34 int err;
34 35
35 if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { 36 if (saddr)
36 saddr->a4 = rt->rt_src; 37 fl.fl4_src = saddr->a4;
37 dst_release(&rt->u.dst); 38
38 return 0; 39 err = __ip_route_output_key(&init_net, &rt, &fl);
39 } 40 dst = &rt->u.dst;
40 return -EHOSTUNREACH; 41 if (err)
42 dst = ERR_PTR(err);
43 return dst;
44}
45
46static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
47{
48 struct dst_entry *dst;
49 struct rtable *rt;
50
51 dst = xfrm4_dst_lookup(0, NULL, daddr);
52 if (IS_ERR(dst))
53 return -EHOSTUNREACH;
54
55 rt = (struct rtable *)dst;
56 saddr->a4 = rt->rt_src;
57 dst_release(dst);
58 return 0;
41} 59}
42 60
43static struct dst_entry * 61static struct dst_entry *
@@ -61,142 +79,49 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
61 return dst; 79 return dst;
62} 80}
63 81
64/* Allocate chain of dst_entry's, attach known xfrm's, calculate 82static int xfrm4_get_tos(struct flowi *fl)
65 * all the metrics... Shortly, bundle a bundle. 83{
66 */ 84 return fl->fl4_tos;
85}
67 86
68static int 87static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
69__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, 88 int nfheader_len)
70 struct flowi *fl, struct dst_entry **dst_p)
71{ 89{
72 struct dst_entry *dst, *dst_prev; 90 return 0;
73 struct rtable *rt0 = (struct rtable*)(*dst_p); 91}
74 struct rtable *rt = rt0;
75 struct flowi fl_tunnel = {
76 .nl_u = {
77 .ip4_u = {
78 .saddr = fl->fl4_src,
79 .daddr = fl->fl4_dst,
80 .tos = fl->fl4_tos
81 }
82 }
83 };
84 int i;
85 int err;
86 int header_len = 0;
87 int trailer_len = 0;
88 92
89 dst = dst_prev = NULL; 93static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
90 dst_hold(&rt->u.dst); 94{
95 struct rtable *rt = (struct rtable *)xdst->route;
91 96
92 for (i = 0; i < nx; i++) { 97 xdst->u.rt.fl = rt->fl;
93 struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
94 struct xfrm_dst *xdst;
95 98
96 if (unlikely(dst1 == NULL)) { 99 xdst->u.dst.dev = dev;
97 err = -ENOBUFS; 100 dev_hold(dev);
98 dst_release(&rt->u.dst);
99 goto error;
100 }
101 101
102 if (!dst) 102 xdst->u.rt.idev = in_dev_get(dev);
103 dst = dst1; 103 if (!xdst->u.rt.idev)
104 else { 104 return -ENODEV;
105 dst_prev->child = dst1;
106 dst1->flags |= DST_NOHASH;
107 dst_clone(dst1);
108 }
109 105
110 xdst = (struct xfrm_dst *)dst1; 106 xdst->u.rt.peer = rt->peer;
111 xdst->route = &rt->u.dst; 107 if (rt->peer)
112 xdst->genid = xfrm[i]->genid; 108 atomic_inc(&rt->peer->refcnt);
113
114 dst1->next = dst_prev;
115 dst_prev = dst1;
116
117 header_len += xfrm[i]->props.header_len;
118 trailer_len += xfrm[i]->props.trailer_len;
119
120 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
121 unsigned short encap_family = xfrm[i]->props.family;
122 switch (encap_family) {
123 case AF_INET:
124 fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
125 fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
126 break;
127#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
128 case AF_INET6:
129 ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6);
130 ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6);
131 break;
132#endif
133 default:
134 BUG_ON(1);
135 }
136 err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
137 &fl_tunnel, encap_family);
138 if (err)
139 goto error;
140 } else
141 dst_hold(&rt->u.dst);
142 }
143 109
144 dst_prev->child = &rt->u.dst; 110 /* Sheit... I remember I did this right. Apparently,
145 dst->path = &rt->u.dst; 111 * it was magically lost, so this code needs audit */
146 112 xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
147 *dst_p = dst; 113 RTCF_LOCAL);
148 dst = dst_prev; 114 xdst->u.rt.rt_type = rt->rt_type;
149 115 xdst->u.rt.rt_src = rt->rt_src;
150 dst_prev = *dst_p; 116 xdst->u.rt.rt_dst = rt->rt_dst;
151 i = 0; 117 xdst->u.rt.rt_gateway = rt->rt_gateway;
152 for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { 118 xdst->u.rt.rt_spec_dst = rt->rt_spec_dst;
153 struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
154 x->u.rt.fl = *fl;
155
156 dst_prev->xfrm = xfrm[i++];
157 dst_prev->dev = rt->u.dst.dev;
158 if (rt->u.dst.dev)
159 dev_hold(rt->u.dst.dev);
160 dst_prev->obsolete = -1;
161 dst_prev->flags |= DST_HOST;
162 dst_prev->lastuse = jiffies;
163 dst_prev->header_len = header_len;
164 dst_prev->nfheader_len = 0;
165 dst_prev->trailer_len = trailer_len;
166 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
167
168 /* Copy neighbout for reachability confirmation */
169 dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
170 dst_prev->input = rt->u.dst.input;
171 dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
172 if (rt0->peer)
173 atomic_inc(&rt0->peer->refcnt);
174 x->u.rt.peer = rt0->peer;
175 /* Sheit... I remember I did this right. Apparently,
176 * it was magically lost, so this code needs audit */
177 x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
178 x->u.rt.rt_type = rt0->rt_type;
179 x->u.rt.rt_src = rt0->rt_src;
180 x->u.rt.rt_dst = rt0->rt_dst;
181 x->u.rt.rt_gateway = rt0->rt_gateway;
182 x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
183 x->u.rt.idev = rt0->idev;
184 in_dev_hold(rt0->idev);
185 header_len -= x->u.dst.xfrm->props.header_len;
186 trailer_len -= x->u.dst.xfrm->props.trailer_len;
187 }
188 119
189 xfrm_init_pmtu(dst);
190 return 0; 120 return 0;
191
192error:
193 if (dst)
194 dst_free(dst);
195 return err;
196} 121}
197 122
198static void 123static void
199_decode_session4(struct sk_buff *skb, struct flowi *fl) 124_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
200{ 125{
201 struct iphdr *iph = ip_hdr(skb); 126 struct iphdr *iph = ip_hdr(skb);
202 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 127 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
@@ -212,8 +137,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
212 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 137 if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
213 __be16 *ports = (__be16 *)xprth; 138 __be16 *ports = (__be16 *)xprth;
214 139
215 fl->fl_ip_sport = ports[0]; 140 fl->fl_ip_sport = ports[!!reverse];
216 fl->fl_ip_dport = ports[1]; 141 fl->fl_ip_dport = ports[!reverse];
217 } 142 }
218 break; 143 break;
219 144
@@ -255,12 +180,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
255 } 180 }
256 } 181 }
257 fl->proto = iph->protocol; 182 fl->proto = iph->protocol;
258 fl->fl4_dst = iph->daddr; 183 fl->fl4_dst = reverse ? iph->saddr : iph->daddr;
259 fl->fl4_src = iph->saddr; 184 fl->fl4_src = reverse ? iph->daddr : iph->saddr;
260 fl->fl4_tos = iph->tos; 185 fl->fl4_tos = iph->tos;
261} 186}
262 187
263static inline int xfrm4_garbage_collect(void) 188static inline int xfrm4_garbage_collect(struct dst_ops *ops)
264{ 189{
265 xfrm4_policy_afinfo.garbage_collect(); 190 xfrm4_policy_afinfo.garbage_collect();
266 return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); 191 return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
@@ -295,7 +220,8 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 220
296 xdst = (struct xfrm_dst *)dst; 221 xdst = (struct xfrm_dst *)dst;
297 if (xdst->u.rt.idev->dev == dev) { 222 if (xdst->u.rt.idev->dev == dev) {
298 struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); 223 struct in_device *loopback_idev =
224 in_dev_get(dev->nd_net->loopback_dev);
299 BUG_ON(!loopback_idev); 225 BUG_ON(!loopback_idev);
300 226
301 do { 227 do {
@@ -318,8 +244,10 @@ static struct dst_ops xfrm4_dst_ops = {
318 .update_pmtu = xfrm4_update_pmtu, 244 .update_pmtu = xfrm4_update_pmtu,
319 .destroy = xfrm4_dst_destroy, 245 .destroy = xfrm4_dst_destroy,
320 .ifdown = xfrm4_dst_ifdown, 246 .ifdown = xfrm4_dst_ifdown,
247 .local_out = __ip_local_out,
321 .gc_thresh = 1024, 248 .gc_thresh = 1024,
322 .entry_size = sizeof(struct xfrm_dst), 249 .entry_size = sizeof(struct xfrm_dst),
250 .entries = ATOMIC_INIT(0),
323}; 251};
324 252
325static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { 253static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
@@ -328,8 +256,10 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
328 .dst_lookup = xfrm4_dst_lookup, 256 .dst_lookup = xfrm4_dst_lookup,
329 .get_saddr = xfrm4_get_saddr, 257 .get_saddr = xfrm4_get_saddr,
330 .find_bundle = __xfrm4_find_bundle, 258 .find_bundle = __xfrm4_find_bundle,
331 .bundle_create = __xfrm4_bundle_create,
332 .decode_session = _decode_session4, 259 .decode_session = _decode_session4,
260 .get_tos = xfrm4_get_tos,
261 .init_path = xfrm4_init_path,
262 .fill_dst = xfrm4_fill_dst,
333}; 263};
334 264
335static void __init xfrm4_policy_init(void) 265static void __init xfrm4_policy_init(void)
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 13d54a1c3337..fdeebe68a379 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -11,6 +11,7 @@
11#include <net/xfrm.h> 11#include <net/xfrm.h>
12#include <linux/pfkeyv2.h> 12#include <linux/pfkeyv2.h>
13#include <linux/ipsec.h> 13#include <linux/ipsec.h>
14#include <linux/netfilter_ipv4.h>
14 15
15static struct xfrm_state_afinfo xfrm4_state_afinfo; 16static struct xfrm_state_afinfo xfrm4_state_afinfo;
16 17
@@ -47,12 +48,31 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
47 x->props.family = AF_INET; 48 x->props.family = AF_INET;
48} 49}
49 50
51int xfrm4_extract_header(struct sk_buff *skb)
52{
53 struct iphdr *iph = ip_hdr(skb);
54
55 XFRM_MODE_SKB_CB(skb)->id = iph->id;
56 XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
57 XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
58 XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
59 memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
60 sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
61
62 return 0;
63}
64
50static struct xfrm_state_afinfo xfrm4_state_afinfo = { 65static struct xfrm_state_afinfo xfrm4_state_afinfo = {
51 .family = AF_INET, 66 .family = AF_INET,
67 .proto = IPPROTO_IPIP,
68 .eth_proto = htons(ETH_P_IP),
52 .owner = THIS_MODULE, 69 .owner = THIS_MODULE,
53 .init_flags = xfrm4_init_flags, 70 .init_flags = xfrm4_init_flags,
54 .init_tempsel = __xfrm4_init_tempsel, 71 .init_tempsel = __xfrm4_init_tempsel,
55 .output = xfrm4_output, 72 .output = xfrm4_output,
73 .extract_input = xfrm4_extract_input,
74 .extract_output = xfrm4_extract_output,
75 .transport_finish = xfrm4_transport_finish,
56}; 76};
57 77
58void __init xfrm4_state_init(void) 78void __init xfrm4_state_init(void)
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 326845195620..41f5982d2087 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -38,7 +38,7 @@ static void ipip_destroy(struct xfrm_state *x)
38{ 38{
39} 39}
40 40
41static struct xfrm_type ipip_type = { 41static const struct xfrm_type ipip_type = {
42 .description = "IPIP", 42 .description = "IPIP",
43 .owner = THIS_MODULE, 43 .owner = THIS_MODULE,
44 .proto = IPPROTO_IPIP, 44 .proto = IPPROTO_IPIP,
@@ -50,7 +50,7 @@ static struct xfrm_type ipip_type = {
50 50
51static int xfrm_tunnel_rcv(struct sk_buff *skb) 51static int xfrm_tunnel_rcv(struct sk_buff *skb)
52{ 52{
53 return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr); 53 return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
54} 54}
55 55
56static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) 56static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index eb0b8085949b..3ffb0323668c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -85,6 +85,7 @@ config INET6_ESP
85 depends on IPV6 85 depends on IPV6
86 select XFRM 86 select XFRM
87 select CRYPTO 87 select CRYPTO
88 select CRYPTO_AEAD
88 select CRYPTO_HMAC 89 select CRYPTO_HMAC
89 select CRYPTO_MD5 90 select CRYPTO_MD5
90 select CRYPTO_CBC 91 select CRYPTO_CBC
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 87c23a73d284..24f3aa0f2a35 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -5,11 +5,12 @@
5obj-$(CONFIG_IPV6) += ipv6.o 5obj-$(CONFIG_IPV6) += ipv6.o
6 6
7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ 7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
8 addrlabel.o \
8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ 9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
9 raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 10 raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
10 exthdrs.o sysctl_net_ipv6.o datagram.o \ 11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
11 ip6_flowlabel.o inet6_connection_sock.o
12 12
13ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ 14ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o 15 xfrm6_output.o
15ipv6-$(CONFIG_NETFILTER) += netfilter.o 16ipv6-$(CONFIG_NETFILTER) += netfilter.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 348bd8d06112..e40213db9e4c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -101,8 +101,16 @@
101#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) 101#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
102 102
103#ifdef CONFIG_SYSCTL 103#ifdef CONFIG_SYSCTL
104static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); 104static void addrconf_sysctl_register(struct inet6_dev *idev);
105static void addrconf_sysctl_unregister(struct ipv6_devconf *p); 105static void addrconf_sysctl_unregister(struct inet6_dev *idev);
106#else
107static inline void addrconf_sysctl_register(struct inet6_dev *idev)
108{
109}
110
111static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
112{
113}
106#endif 114#endif
107 115
108#ifdef CONFIG_IPV6_PRIVACY 116#ifdef CONFIG_IPV6_PRIVACY
@@ -141,7 +149,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
141 149
142static void inet6_prefix_notify(int event, struct inet6_dev *idev, 150static void inet6_prefix_notify(int event, struct inet6_dev *idev,
143 struct prefix_info *pinfo); 151 struct prefix_info *pinfo);
144static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); 152static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
153 struct net_device *dev);
145 154
146static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); 155static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
147 156
@@ -256,16 +265,13 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
256static int snmp6_alloc_dev(struct inet6_dev *idev) 265static int snmp6_alloc_dev(struct inet6_dev *idev)
257{ 266{
258 if (snmp_mib_init((void **)idev->stats.ipv6, 267 if (snmp_mib_init((void **)idev->stats.ipv6,
259 sizeof(struct ipstats_mib), 268 sizeof(struct ipstats_mib)) < 0)
260 __alignof__(struct ipstats_mib)) < 0)
261 goto err_ip; 269 goto err_ip;
262 if (snmp_mib_init((void **)idev->stats.icmpv6, 270 if (snmp_mib_init((void **)idev->stats.icmpv6,
263 sizeof(struct icmpv6_mib), 271 sizeof(struct icmpv6_mib)) < 0)
264 __alignof__(struct icmpv6_mib)) < 0)
265 goto err_icmp; 272 goto err_icmp;
266 if (snmp_mib_init((void **)idev->stats.icmpv6msg, 273 if (snmp_mib_init((void **)idev->stats.icmpv6msg,
267 sizeof(struct icmpv6msg_mib), 274 sizeof(struct icmpv6msg_mib)) < 0)
268 __alignof__(struct icmpv6msg_mib)) < 0)
269 goto err_icmpmsg; 275 goto err_icmpmsg;
270 276
271 return 0; 277 return 0;
@@ -329,7 +335,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
329 335
330 rwlock_init(&ndev->lock); 336 rwlock_init(&ndev->lock);
331 ndev->dev = dev; 337 ndev->dev = dev;
332 memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); 338 memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf));
333 ndev->cnf.mtu6 = dev->mtu; 339 ndev->cnf.mtu6 = dev->mtu;
334 ndev->cnf.sysctl = NULL; 340 ndev->cnf.sysctl = NULL;
335 ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); 341 ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -366,9 +372,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
366 in6_dev_hold(ndev); 372 in6_dev_hold(ndev);
367 373
368#ifdef CONFIG_IPV6_PRIVACY 374#ifdef CONFIG_IPV6_PRIVACY
369 init_timer(&ndev->regen_timer); 375 setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
370 ndev->regen_timer.function = ipv6_regen_rndid;
371 ndev->regen_timer.data = (unsigned long) ndev;
372 if ((dev->flags&IFF_LOOPBACK) || 376 if ((dev->flags&IFF_LOOPBACK) ||
373 dev->type == ARPHRD_TUNNEL || 377 dev->type == ARPHRD_TUNNEL ||
374#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 378#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
@@ -379,6 +383,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
379 "%s: Disabled Privacy Extensions\n", 383 "%s: Disabled Privacy Extensions\n",
380 dev->name); 384 dev->name);
381 ndev->cnf.use_tempaddr = -1; 385 ndev->cnf.use_tempaddr = -1;
386
387 if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
388 printk(KERN_INFO
389 "%s: Disabled Multicast RS\n",
390 dev->name);
391 ndev->cnf.rtr_solicits = 0;
392 }
382 } else { 393 } else {
383 in6_dev_hold(ndev); 394 in6_dev_hold(ndev);
384 ipv6_regen_rndid((unsigned long) ndev); 395 ipv6_regen_rndid((unsigned long) ndev);
@@ -390,13 +401,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
390 401
391 ipv6_mc_init_dev(ndev); 402 ipv6_mc_init_dev(ndev);
392 ndev->tstamp = jiffies; 403 ndev->tstamp = jiffies;
393#ifdef CONFIG_SYSCTL 404 addrconf_sysctl_register(ndev);
394 neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
395 NET_IPV6_NEIGH, "ipv6",
396 &ndisc_ifinfo_sysctl_change,
397 NULL);
398 addrconf_sysctl_register(ndev, &ndev->cnf);
399#endif
400 /* protected by rtnl_lock */ 405 /* protected by rtnl_lock */
401 rcu_assign_pointer(dev->ip6_ptr, ndev); 406 rcu_assign_pointer(dev->ip6_ptr, ndev);
402 407
@@ -452,18 +457,18 @@ static void dev_forward_change(struct inet6_dev *idev)
452} 457}
453 458
454 459
455static void addrconf_forward_change(void) 460static void addrconf_forward_change(struct net *net, __s32 newf)
456{ 461{
457 struct net_device *dev; 462 struct net_device *dev;
458 struct inet6_dev *idev; 463 struct inet6_dev *idev;
459 464
460 read_lock(&dev_base_lock); 465 read_lock(&dev_base_lock);
461 for_each_netdev(&init_net, dev) { 466 for_each_netdev(net, dev) {
462 rcu_read_lock(); 467 rcu_read_lock();
463 idev = __in6_dev_get(dev); 468 idev = __in6_dev_get(dev);
464 if (idev) { 469 if (idev) {
465 int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); 470 int changed = (!idev->cnf.forwarding) ^ (!newf);
466 idev->cnf.forwarding = ipv6_devconf.forwarding; 471 idev->cnf.forwarding = newf;
467 if (changed) 472 if (changed)
468 dev_forward_change(idev); 473 dev_forward_change(idev);
469 } 474 }
@@ -471,6 +476,25 @@ static void addrconf_forward_change(void)
471 } 476 }
472 read_unlock(&dev_base_lock); 477 read_unlock(&dev_base_lock);
473} 478}
479
480static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
481{
482 struct net *net;
483
484 net = (struct net *)table->extra2;
485 if (p == &net->ipv6.devconf_dflt->forwarding)
486 return;
487
488 if (p == &net->ipv6.devconf_all->forwarding) {
489 __s32 newf = net->ipv6.devconf_all->forwarding;
490 net->ipv6.devconf_dflt->forwarding = newf;
491 addrconf_forward_change(net, newf);
492 } else if ((!*p) ^ (!old))
493 dev_forward_change((struct inet6_dev *)table->extra1);
494
495 if (*p)
496 rt6_purge_dflt_routers();
497}
474#endif 498#endif
475 499
476/* Nobody refers to this ifaddr, destroy it */ 500/* Nobody refers to this ifaddr, destroy it */
@@ -537,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
537 write_lock(&addrconf_hash_lock); 561 write_lock(&addrconf_hash_lock);
538 562
539 /* Ignore adding duplicate addresses on an interface */ 563 /* Ignore adding duplicate addresses on an interface */
540 if (ipv6_chk_same_addr(addr, idev->dev)) { 564 if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) {
541 ADBG(("ipv6_add_addr: already assigned\n")); 565 ADBG(("ipv6_add_addr: already assigned\n"));
542 err = -EEXIST; 566 err = -EEXIST;
543 goto out; 567 goto out;
@@ -876,35 +900,6 @@ static inline int ipv6_saddr_preferred(int type)
876 return 0; 900 return 0;
877} 901}
878 902
879/* static matching label */
880static inline int ipv6_saddr_label(const struct in6_addr *addr, int type)
881{
882 /*
883 * prefix (longest match) label
884 * -----------------------------
885 * ::1/128 0
886 * ::/0 1
887 * 2002::/16 2
888 * ::/96 3
889 * ::ffff:0:0/96 4
890 * fc00::/7 5
891 * 2001::/32 6
892 */
893 if (type & IPV6_ADDR_LOOPBACK)
894 return 0;
895 else if (type & IPV6_ADDR_COMPATv4)
896 return 3;
897 else if (type & IPV6_ADDR_MAPPED)
898 return 4;
899 else if (addr->s6_addr32[0] == htonl(0x20010000))
900 return 6;
901 else if (addr->s6_addr16[0] == htons(0x2002))
902 return 2;
903 else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
904 return 5;
905 return 1;
906}
907
908int ipv6_dev_get_saddr(struct net_device *daddr_dev, 903int ipv6_dev_get_saddr(struct net_device *daddr_dev,
909 struct in6_addr *daddr, struct in6_addr *saddr) 904 struct in6_addr *daddr, struct in6_addr *saddr)
910{ 905{
@@ -912,7 +907,8 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
912 struct inet6_ifaddr *ifa_result = NULL; 907 struct inet6_ifaddr *ifa_result = NULL;
913 int daddr_type = __ipv6_addr_type(daddr); 908 int daddr_type = __ipv6_addr_type(daddr);
914 int daddr_scope = __ipv6_addr_src_scope(daddr_type); 909 int daddr_scope = __ipv6_addr_src_scope(daddr_type);
915 u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); 910 int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0;
911 u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex);
916 struct net_device *dev; 912 struct net_device *dev;
917 913
918 memset(&hiscore, 0, sizeof(hiscore)); 914 memset(&hiscore, 0, sizeof(hiscore));
@@ -967,7 +963,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
967 if (unlikely(score.addr_type == IPV6_ADDR_ANY || 963 if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
968 score.addr_type & IPV6_ADDR_MULTICAST)) { 964 score.addr_type & IPV6_ADDR_MULTICAST)) {
969 LIMIT_NETDEBUG(KERN_DEBUG 965 LIMIT_NETDEBUG(KERN_DEBUG
970 "ADDRCONF: unspecified / multicast address" 966 "ADDRCONF: unspecified / multicast address "
971 "assigned as unicast address on %s", 967 "assigned as unicast address on %s",
972 dev->name); 968 dev->name);
973 continue; 969 continue;
@@ -1085,11 +1081,15 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
1085 1081
1086 /* Rule 6: Prefer matching label */ 1082 /* Rule 6: Prefer matching label */
1087 if (hiscore.rule < 6) { 1083 if (hiscore.rule < 6) {
1088 if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) 1084 if (ipv6_addr_label(&ifa_result->addr,
1085 hiscore.addr_type,
1086 ifa_result->idev->dev->ifindex) == daddr_label)
1089 hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; 1087 hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
1090 hiscore.rule++; 1088 hiscore.rule++;
1091 } 1089 }
1092 if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { 1090 if (ipv6_addr_label(&ifa->addr,
1091 score.addr_type,
1092 ifa->idev->dev->ifindex) == daddr_label) {
1093 score.attrs |= IPV6_SADDR_SCORE_LABEL; 1093 score.attrs |= IPV6_SADDR_SCORE_LABEL;
1094 if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { 1094 if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
1095 score.rule = 6; 1095 score.rule = 6;
@@ -1207,13 +1207,16 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
1207 return cnt; 1207 return cnt;
1208} 1208}
1209 1209
1210int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) 1210int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
1211 struct net_device *dev, int strict)
1211{ 1212{
1212 struct inet6_ifaddr * ifp; 1213 struct inet6_ifaddr * ifp;
1213 u8 hash = ipv6_addr_hash(addr); 1214 u8 hash = ipv6_addr_hash(addr);
1214 1215
1215 read_lock_bh(&addrconf_hash_lock); 1216 read_lock_bh(&addrconf_hash_lock);
1216 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1217 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1218 if (ifp->idev->dev->nd_net != net)
1219 continue;
1217 if (ipv6_addr_equal(&ifp->addr, addr) && 1220 if (ipv6_addr_equal(&ifp->addr, addr) &&
1218 !(ifp->flags&IFA_F_TENTATIVE)) { 1221 !(ifp->flags&IFA_F_TENTATIVE)) {
1219 if (dev == NULL || ifp->idev->dev == dev || 1222 if (dev == NULL || ifp->idev->dev == dev ||
@@ -1224,16 +1227,18 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
1224 read_unlock_bh(&addrconf_hash_lock); 1227 read_unlock_bh(&addrconf_hash_lock);
1225 return ifp != NULL; 1228 return ifp != NULL;
1226} 1229}
1227
1228EXPORT_SYMBOL(ipv6_chk_addr); 1230EXPORT_SYMBOL(ipv6_chk_addr);
1229 1231
1230static 1232static
1231int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) 1233int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1234 struct net_device *dev)
1232{ 1235{
1233 struct inet6_ifaddr * ifp; 1236 struct inet6_ifaddr * ifp;
1234 u8 hash = ipv6_addr_hash(addr); 1237 u8 hash = ipv6_addr_hash(addr);
1235 1238
1236 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1239 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1240 if (ifp->idev->dev->nd_net != net)
1241 continue;
1237 if (ipv6_addr_equal(&ifp->addr, addr)) { 1242 if (ipv6_addr_equal(&ifp->addr, addr)) {
1238 if (dev == NULL || ifp->idev->dev == dev) 1243 if (dev == NULL || ifp->idev->dev == dev)
1239 break; 1244 break;
@@ -1242,13 +1247,16 @@ int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
1242 return ifp != NULL; 1247 return ifp != NULL;
1243} 1248}
1244 1249
1245struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict) 1250struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr,
1251 struct net_device *dev, int strict)
1246{ 1252{
1247 struct inet6_ifaddr * ifp; 1253 struct inet6_ifaddr * ifp;
1248 u8 hash = ipv6_addr_hash(addr); 1254 u8 hash = ipv6_addr_hash(addr);
1249 1255
1250 read_lock_bh(&addrconf_hash_lock); 1256 read_lock_bh(&addrconf_hash_lock);
1251 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1257 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1258 if (ifp->idev->dev->nd_net != net)
1259 continue;
1252 if (ipv6_addr_equal(&ifp->addr, addr)) { 1260 if (ipv6_addr_equal(&ifp->addr, addr)) {
1253 if (dev == NULL || ifp->idev->dev == dev || 1261 if (dev == NULL || ifp->idev->dev == dev ||
1254 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { 1262 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@@ -1435,6 +1443,9 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1435 return addrconf_ifid_arcnet(eui, dev); 1443 return addrconf_ifid_arcnet(eui, dev);
1436 case ARPHRD_INFINIBAND: 1444 case ARPHRD_INFINIBAND:
1437 return addrconf_ifid_infiniband(eui, dev); 1445 return addrconf_ifid_infiniband(eui, dev);
1446 case ARPHRD_SIT:
1447 if (dev->priv_flags & IFF_ISATAP)
1448 return ipv6_isatap_eui64(eui, *(__be32 *)dev->dev_addr);
1438 } 1449 }
1439 return -1; 1450 return -1;
1440} 1451}
@@ -1470,7 +1481,7 @@ regen:
1470 * 1481 *
1471 * - Reserved subnet anycast (RFC 2526) 1482 * - Reserved subnet anycast (RFC 2526)
1472 * 11111101 11....11 1xxxxxxx 1483 * 11111101 11....11 1xxxxxxx
1473 * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1 1484 * - ISATAP (RFC4214) 6.1
1474 * 00-00-5E-FE-xx-xx-xx-xx 1485 * 00-00-5E-FE-xx-xx-xx-xx
1475 * - value 0 1486 * - value 0
1476 * - XXX: already assigned to an address on the device 1487 * - XXX: already assigned to an address on the device
@@ -1731,7 +1742,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1731 1742
1732ok: 1743ok:
1733 1744
1734 ifp = ipv6_get_ifaddr(&addr, dev, 1); 1745 ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1);
1735 1746
1736 if (ifp == NULL && valid_lft) { 1747 if (ifp == NULL && valid_lft) {
1737 int max_addresses = in6_dev->cnf.max_addresses; 1748 int max_addresses = in6_dev->cnf.max_addresses;
@@ -1889,7 +1900,7 @@ int addrconf_set_dstaddr(void __user *arg)
1889 p.iph.ihl = 5; 1900 p.iph.ihl = 5;
1890 p.iph.protocol = IPPROTO_IPV6; 1901 p.iph.protocol = IPPROTO_IPV6;
1891 p.iph.ttl = 64; 1902 p.iph.ttl = 64;
1892 ifr.ifr_ifru.ifru_data = (void __user *)&p; 1903 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1893 1904
1894 oldfs = get_fs(); set_fs(KERNEL_DS); 1905 oldfs = get_fs(); set_fs(KERNEL_DS);
1895 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 1906 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
@@ -2201,6 +2212,16 @@ static void addrconf_sit_config(struct net_device *dev)
2201 return; 2212 return;
2202 } 2213 }
2203 2214
2215 if (dev->priv_flags & IFF_ISATAP) {
2216 struct in6_addr addr;
2217
2218 ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
2219 addrconf_prefix_route(&addr, 64, dev, 0, 0);
2220 if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
2221 addrconf_add_linklocal(idev, &addr);
2222 return;
2223 }
2224
2204 sit_add_v4_addrs(idev); 2225 sit_add_v4_addrs(idev);
2205 2226
2206 if (dev->flags&IFF_POINTOPOINT) { 2227 if (dev->flags&IFF_POINTOPOINT) {
@@ -2293,6 +2314,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2293 break; 2314 break;
2294 } 2315 }
2295 2316
2317 if (!idev && dev->mtu >= IPV6_MIN_MTU)
2318 idev = ipv6_add_dev(dev);
2319
2296 if (idev) 2320 if (idev)
2297 idev->if_flags |= IF_READY; 2321 idev->if_flags |= IF_READY;
2298 } else { 2322 } else {
@@ -2357,12 +2381,18 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2357 break; 2381 break;
2358 2382
2359 case NETDEV_CHANGEMTU: 2383 case NETDEV_CHANGEMTU:
2360 if ( idev && dev->mtu >= IPV6_MIN_MTU) { 2384 if (idev && dev->mtu >= IPV6_MIN_MTU) {
2361 rt6_mtu_change(dev, dev->mtu); 2385 rt6_mtu_change(dev, dev->mtu);
2362 idev->cnf.mtu6 = dev->mtu; 2386 idev->cnf.mtu6 = dev->mtu;
2363 break; 2387 break;
2364 } 2388 }
2365 2389
2390 if (!idev && dev->mtu >= IPV6_MIN_MTU) {
2391 idev = ipv6_add_dev(dev);
2392 if (idev)
2393 break;
2394 }
2395
2366 /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ 2396 /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
2367 2397
2368 case NETDEV_DOWN: 2398 case NETDEV_DOWN:
@@ -2376,15 +2406,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2376 case NETDEV_CHANGENAME: 2406 case NETDEV_CHANGENAME:
2377 if (idev) { 2407 if (idev) {
2378 snmp6_unregister_dev(idev); 2408 snmp6_unregister_dev(idev);
2379#ifdef CONFIG_SYSCTL 2409 addrconf_sysctl_unregister(idev);
2380 addrconf_sysctl_unregister(&idev->cnf); 2410 addrconf_sysctl_register(idev);
2381 neigh_sysctl_unregister(idev->nd_parms);
2382 neigh_sysctl_register(dev, idev->nd_parms,
2383 NET_IPV6, NET_IPV6_NEIGH, "ipv6",
2384 &ndisc_ifinfo_sysctl_change,
2385 NULL);
2386 addrconf_sysctl_register(idev, &idev->cnf);
2387#endif
2388 err = snmp6_register_dev(idev); 2411 err = snmp6_register_dev(idev);
2389 if (err) 2412 if (err)
2390 return notifier_from_errno(err); 2413 return notifier_from_errno(err);
@@ -2508,10 +2531,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2508 /* Shot the device (if unregistered) */ 2531 /* Shot the device (if unregistered) */
2509 2532
2510 if (how == 1) { 2533 if (how == 1) {
2511#ifdef CONFIG_SYSCTL 2534 addrconf_sysctl_unregister(idev);
2512 addrconf_sysctl_unregister(&idev->cnf);
2513 neigh_sysctl_unregister(idev->nd_parms);
2514#endif
2515 neigh_parms_release(&nd_tbl, idev->nd_parms); 2535 neigh_parms_release(&nd_tbl, idev->nd_parms);
2516 neigh_ifdown(&nd_tbl, dev); 2536 neigh_ifdown(&nd_tbl, dev);
2517 in6_dev_put(idev); 2537 in6_dev_put(idev);
@@ -2725,6 +2745,7 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
2725 2745
2726#ifdef CONFIG_PROC_FS 2746#ifdef CONFIG_PROC_FS
2727struct if6_iter_state { 2747struct if6_iter_state {
2748 struct seq_net_private p;
2728 int bucket; 2749 int bucket;
2729}; 2750};
2730 2751
@@ -2732,9 +2753,13 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
2732{ 2753{
2733 struct inet6_ifaddr *ifa = NULL; 2754 struct inet6_ifaddr *ifa = NULL;
2734 struct if6_iter_state *state = seq->private; 2755 struct if6_iter_state *state = seq->private;
2756 struct net *net = state->p.net;
2735 2757
2736 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { 2758 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
2737 ifa = inet6_addr_lst[state->bucket]; 2759 ifa = inet6_addr_lst[state->bucket];
2760
2761 while (ifa && ifa->idev->dev->nd_net != net)
2762 ifa = ifa->lst_next;
2738 if (ifa) 2763 if (ifa)
2739 break; 2764 break;
2740 } 2765 }
@@ -2744,13 +2769,22 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
2744static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) 2769static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
2745{ 2770{
2746 struct if6_iter_state *state = seq->private; 2771 struct if6_iter_state *state = seq->private;
2772 struct net *net = state->p.net;
2747 2773
2748 ifa = ifa->lst_next; 2774 ifa = ifa->lst_next;
2749try_again: 2775try_again:
2776 if (ifa) {
2777 if (ifa->idev->dev->nd_net != net) {
2778 ifa = ifa->lst_next;
2779 goto try_again;
2780 }
2781 }
2782
2750 if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { 2783 if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
2751 ifa = inet6_addr_lst[state->bucket]; 2784 ifa = inet6_addr_lst[state->bucket];
2752 goto try_again; 2785 goto try_again;
2753 } 2786 }
2787
2754 return ifa; 2788 return ifa;
2755} 2789}
2756 2790
@@ -2765,6 +2799,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
2765} 2799}
2766 2800
2767static void *if6_seq_start(struct seq_file *seq, loff_t *pos) 2801static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
2802 __acquires(addrconf_hash_lock)
2768{ 2803{
2769 read_lock_bh(&addrconf_hash_lock); 2804 read_lock_bh(&addrconf_hash_lock);
2770 return if6_get_idx(seq, *pos); 2805 return if6_get_idx(seq, *pos);
@@ -2780,6 +2815,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2780} 2815}
2781 2816
2782static void if6_seq_stop(struct seq_file *seq, void *v) 2817static void if6_seq_stop(struct seq_file *seq, void *v)
2818 __releases(addrconf_hash_lock)
2783{ 2819{
2784 read_unlock_bh(&addrconf_hash_lock); 2820 read_unlock_bh(&addrconf_hash_lock);
2785} 2821}
@@ -2807,8 +2843,8 @@ static const struct seq_operations if6_seq_ops = {
2807 2843
2808static int if6_seq_open(struct inode *inode, struct file *file) 2844static int if6_seq_open(struct inode *inode, struct file *file)
2809{ 2845{
2810 return seq_open_private(file, &if6_seq_ops, 2846 return seq_open_net(inode, file, &if6_seq_ops,
2811 sizeof(struct if6_iter_state)); 2847 sizeof(struct if6_iter_state));
2812} 2848}
2813 2849
2814static const struct file_operations if6_fops = { 2850static const struct file_operations if6_fops = {
@@ -2816,31 +2852,48 @@ static const struct file_operations if6_fops = {
2816 .open = if6_seq_open, 2852 .open = if6_seq_open,
2817 .read = seq_read, 2853 .read = seq_read,
2818 .llseek = seq_lseek, 2854 .llseek = seq_lseek,
2819 .release = seq_release_private, 2855 .release = seq_release_net,
2820}; 2856};
2821 2857
2822int __init if6_proc_init(void) 2858static int if6_proc_net_init(struct net *net)
2823{ 2859{
2824 if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) 2860 if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
2825 return -ENOMEM; 2861 return -ENOMEM;
2826 return 0; 2862 return 0;
2827} 2863}
2828 2864
2865static void if6_proc_net_exit(struct net *net)
2866{
2867 proc_net_remove(net, "if_inet6");
2868}
2869
2870static struct pernet_operations if6_proc_net_ops = {
2871 .init = if6_proc_net_init,
2872 .exit = if6_proc_net_exit,
2873};
2874
2875int __init if6_proc_init(void)
2876{
2877 return register_pernet_subsys(&if6_proc_net_ops);
2878}
2879
2829void if6_proc_exit(void) 2880void if6_proc_exit(void)
2830{ 2881{
2831 proc_net_remove(&init_net, "if_inet6"); 2882 unregister_pernet_subsys(&if6_proc_net_ops);
2832} 2883}
2833#endif /* CONFIG_PROC_FS */ 2884#endif /* CONFIG_PROC_FS */
2834 2885
2835#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 2886#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
2836/* Check if address is a home address configured on any interface. */ 2887/* Check if address is a home address configured on any interface. */
2837int ipv6_chk_home_addr(struct in6_addr *addr) 2888int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
2838{ 2889{
2839 int ret = 0; 2890 int ret = 0;
2840 struct inet6_ifaddr * ifp; 2891 struct inet6_ifaddr * ifp;
2841 u8 hash = ipv6_addr_hash(addr); 2892 u8 hash = ipv6_addr_hash(addr);
2842 read_lock_bh(&addrconf_hash_lock); 2893 read_lock_bh(&addrconf_hash_lock);
2843 for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { 2894 for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
2895 if (ifp->idev->dev->nd_net != net)
2896 continue;
2844 if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && 2897 if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
2845 (ifp->flags & IFA_F_HOMEADDRESS)) { 2898 (ifp->flags & IFA_F_HOMEADDRESS)) {
2846 ret = 1; 2899 ret = 1;
@@ -2988,11 +3041,15 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
2988static int 3041static int
2989inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3042inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2990{ 3043{
3044 struct net *net = skb->sk->sk_net;
2991 struct ifaddrmsg *ifm; 3045 struct ifaddrmsg *ifm;
2992 struct nlattr *tb[IFA_MAX+1]; 3046 struct nlattr *tb[IFA_MAX+1];
2993 struct in6_addr *pfx; 3047 struct in6_addr *pfx;
2994 int err; 3048 int err;
2995 3049
3050 if (net != &init_net)
3051 return -EINVAL;
3052
2996 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); 3053 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
2997 if (err < 0) 3054 if (err < 0)
2998 return err; 3055 return err;
@@ -3045,6 +3102,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3045static int 3102static int
3046inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3103inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3047{ 3104{
3105 struct net *net = skb->sk->sk_net;
3048 struct ifaddrmsg *ifm; 3106 struct ifaddrmsg *ifm;
3049 struct nlattr *tb[IFA_MAX+1]; 3107 struct nlattr *tb[IFA_MAX+1];
3050 struct in6_addr *pfx; 3108 struct in6_addr *pfx;
@@ -3054,6 +3112,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3054 u8 ifa_flags; 3112 u8 ifa_flags;
3055 int err; 3113 int err;
3056 3114
3115 if (net != &init_net)
3116 return -EINVAL;
3117
3057 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); 3118 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
3058 if (err < 0) 3119 if (err < 0)
3059 return err; 3120 return err;
@@ -3081,7 +3142,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3081 /* We ignore other flags so far. */ 3142 /* We ignore other flags so far. */
3082 ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); 3143 ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
3083 3144
3084 ifa = ipv6_get_ifaddr(pfx, dev, 1); 3145 ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
3085 if (ifa == NULL) { 3146 if (ifa == NULL) {
3086 /* 3147 /*
3087 * It would be best to check for !NLM_F_CREATE here but 3148 * It would be best to check for !NLM_F_CREATE here but
@@ -3274,11 +3335,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3274 ifa = ifa->if_next, ip_idx++) { 3335 ifa = ifa->if_next, ip_idx++) {
3275 if (ip_idx < s_ip_idx) 3336 if (ip_idx < s_ip_idx)
3276 continue; 3337 continue;
3277 if ((err = inet6_fill_ifaddr(skb, ifa, 3338 err = inet6_fill_ifaddr(skb, ifa,
3278 NETLINK_CB(cb->skb).pid, 3339 NETLINK_CB(cb->skb).pid,
3279 cb->nlh->nlmsg_seq, RTM_NEWADDR, 3340 cb->nlh->nlmsg_seq,
3280 NLM_F_MULTI)) <= 0) 3341 RTM_NEWADDR,
3281 goto done; 3342 NLM_F_MULTI);
3282 } 3343 }
3283 break; 3344 break;
3284 case MULTICAST_ADDR: 3345 case MULTICAST_ADDR:
@@ -3287,11 +3348,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3287 ifmca = ifmca->next, ip_idx++) { 3348 ifmca = ifmca->next, ip_idx++) {
3288 if (ip_idx < s_ip_idx) 3349 if (ip_idx < s_ip_idx)
3289 continue; 3350 continue;
3290 if ((err = inet6_fill_ifmcaddr(skb, ifmca, 3351 err = inet6_fill_ifmcaddr(skb, ifmca,
3291 NETLINK_CB(cb->skb).pid, 3352 NETLINK_CB(cb->skb).pid,
3292 cb->nlh->nlmsg_seq, RTM_GETMULTICAST, 3353 cb->nlh->nlmsg_seq,
3293 NLM_F_MULTI)) <= 0) 3354 RTM_GETMULTICAST,
3294 goto done; 3355 NLM_F_MULTI);
3295 } 3356 }
3296 break; 3357 break;
3297 case ANYCAST_ADDR: 3358 case ANYCAST_ADDR:
@@ -3300,11 +3361,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3300 ifaca = ifaca->aca_next, ip_idx++) { 3361 ifaca = ifaca->aca_next, ip_idx++) {
3301 if (ip_idx < s_ip_idx) 3362 if (ip_idx < s_ip_idx)
3302 continue; 3363 continue;
3303 if ((err = inet6_fill_ifacaddr(skb, ifaca, 3364 err = inet6_fill_ifacaddr(skb, ifaca,
3304 NETLINK_CB(cb->skb).pid, 3365 NETLINK_CB(cb->skb).pid,
3305 cb->nlh->nlmsg_seq, RTM_GETANYCAST, 3366 cb->nlh->nlmsg_seq,
3306 NLM_F_MULTI)) <= 0) 3367 RTM_GETANYCAST,
3307 goto done; 3368 NLM_F_MULTI);
3308 } 3369 }
3309 break; 3370 break;
3310 default: 3371 default:
@@ -3312,14 +3373,12 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3312 } 3373 }
3313 read_unlock_bh(&idev->lock); 3374 read_unlock_bh(&idev->lock);
3314 in6_dev_put(idev); 3375 in6_dev_put(idev);
3376
3377 if (err <= 0)
3378 break;
3315cont: 3379cont:
3316 idx++; 3380 idx++;
3317 } 3381 }
3318done:
3319 if (err <= 0) {
3320 read_unlock_bh(&idev->lock);
3321 in6_dev_put(idev);
3322 }
3323 cb->args[0] = idx; 3382 cb->args[0] = idx;
3324 cb->args[1] = ip_idx; 3383 cb->args[1] = ip_idx;
3325 return skb->len; 3384 return skb->len;
@@ -3327,26 +3386,42 @@ done:
3327 3386
3328static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 3387static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
3329{ 3388{
3389 struct net *net = skb->sk->sk_net;
3330 enum addr_type_t type = UNICAST_ADDR; 3390 enum addr_type_t type = UNICAST_ADDR;
3391
3392 if (net != &init_net)
3393 return 0;
3394
3331 return inet6_dump_addr(skb, cb, type); 3395 return inet6_dump_addr(skb, cb, type);
3332} 3396}
3333 3397
3334static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) 3398static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
3335{ 3399{
3400 struct net *net = skb->sk->sk_net;
3336 enum addr_type_t type = MULTICAST_ADDR; 3401 enum addr_type_t type = MULTICAST_ADDR;
3402
3403 if (net != &init_net)
3404 return 0;
3405
3337 return inet6_dump_addr(skb, cb, type); 3406 return inet6_dump_addr(skb, cb, type);
3338} 3407}
3339 3408
3340 3409
3341static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) 3410static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
3342{ 3411{
3412 struct net *net = skb->sk->sk_net;
3343 enum addr_type_t type = ANYCAST_ADDR; 3413 enum addr_type_t type = ANYCAST_ADDR;
3414
3415 if (net != &init_net)
3416 return 0;
3417
3344 return inet6_dump_addr(skb, cb, type); 3418 return inet6_dump_addr(skb, cb, type);
3345} 3419}
3346 3420
3347static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, 3421static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3348 void *arg) 3422 void *arg)
3349{ 3423{
3424 struct net *net = in_skb->sk->sk_net;
3350 struct ifaddrmsg *ifm; 3425 struct ifaddrmsg *ifm;
3351 struct nlattr *tb[IFA_MAX+1]; 3426 struct nlattr *tb[IFA_MAX+1];
3352 struct in6_addr *addr = NULL; 3427 struct in6_addr *addr = NULL;
@@ -3355,6 +3430,9 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3355 struct sk_buff *skb; 3430 struct sk_buff *skb;
3356 int err; 3431 int err;
3357 3432
3433 if (net != &init_net)
3434 return -EINVAL;
3435
3358 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); 3436 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
3359 if (err < 0) 3437 if (err < 0)
3360 goto errout; 3438 goto errout;
@@ -3369,7 +3447,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3369 if (ifm->ifa_index) 3447 if (ifm->ifa_index)
3370 dev = __dev_get_by_index(&init_net, ifm->ifa_index); 3448 dev = __dev_get_by_index(&init_net, ifm->ifa_index);
3371 3449
3372 if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { 3450 if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
3373 err = -EADDRNOTAVAIL; 3451 err = -EADDRNOTAVAIL;
3374 goto errout; 3452 goto errout;
3375 } 3453 }
@@ -3387,7 +3465,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3387 kfree_skb(skb); 3465 kfree_skb(skb);
3388 goto errout_ifa; 3466 goto errout_ifa;
3389 } 3467 }
3390 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 3468 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
3391errout_ifa: 3469errout_ifa:
3392 in6_ifa_put(ifa); 3470 in6_ifa_put(ifa);
3393errout: 3471errout:
@@ -3410,10 +3488,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
3410 kfree_skb(skb); 3488 kfree_skb(skb);
3411 goto errout; 3489 goto errout;
3412 } 3490 }
3413 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3491 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3414errout: 3492errout:
3415 if (err < 0) 3493 if (err < 0)
3416 rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); 3494 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
3417} 3495}
3418 3496
3419static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, 3497static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3572,11 +3650,15 @@ nla_put_failure:
3572 3650
3573static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 3651static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
3574{ 3652{
3653 struct net *net = skb->sk->sk_net;
3575 int idx, err; 3654 int idx, err;
3576 int s_idx = cb->args[0]; 3655 int s_idx = cb->args[0];
3577 struct net_device *dev; 3656 struct net_device *dev;
3578 struct inet6_dev *idev; 3657 struct inet6_dev *idev;
3579 3658
3659 if (net != &init_net)
3660 return 0;
3661
3580 read_lock(&dev_base_lock); 3662 read_lock(&dev_base_lock);
3581 idx = 0; 3663 idx = 0;
3582 for_each_netdev(&init_net, dev) { 3664 for_each_netdev(&init_net, dev) {
@@ -3614,10 +3696,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3614 kfree_skb(skb); 3696 kfree_skb(skb);
3615 goto errout; 3697 goto errout;
3616 } 3698 }
3617 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3699 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3618errout: 3700errout:
3619 if (err < 0) 3701 if (err < 0)
3620 rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); 3702 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
3621} 3703}
3622 3704
3623static inline size_t inet6_prefix_nlmsg_size(void) 3705static inline size_t inet6_prefix_nlmsg_size(void)
@@ -3683,10 +3765,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3683 kfree_skb(skb); 3765 kfree_skb(skb);
3684 goto errout; 3766 goto errout;
3685 } 3767 }
3686 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); 3768 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
3687errout: 3769errout:
3688 if (err < 0) 3770 if (err < 0)
3689 rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); 3771 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
3690} 3772}
3691 3773
3692static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 3774static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -3737,22 +3819,8 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
3737 3819
3738 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 3820 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
3739 3821
3740 if (write && valp != &ipv6_devconf_dflt.forwarding) { 3822 if (write)
3741 if (valp != &ipv6_devconf.forwarding) { 3823 addrconf_fixup_forwarding(ctl, valp, val);
3742 if ((!*valp) ^ (!val)) {
3743 struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
3744 if (idev == NULL)
3745 return ret;
3746 dev_forward_change(idev);
3747 }
3748 } else {
3749 ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
3750 addrconf_forward_change();
3751 }
3752 if (*valp)
3753 rt6_purge_dflt_routers();
3754 }
3755
3756 return ret; 3824 return ret;
3757} 3825}
3758 3826
@@ -3763,6 +3831,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
3763 void __user *newval, size_t newlen) 3831 void __user *newval, size_t newlen)
3764{ 3832{
3765 int *valp = table->data; 3833 int *valp = table->data;
3834 int val = *valp;
3766 int new; 3835 int new;
3767 3836
3768 if (!newval || !newlen) 3837 if (!newval || !newlen)
@@ -3787,26 +3856,8 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
3787 } 3856 }
3788 } 3857 }
3789 3858
3790 if (valp != &ipv6_devconf_dflt.forwarding) { 3859 *valp = new;
3791 if (valp != &ipv6_devconf.forwarding) { 3860 addrconf_fixup_forwarding(table, valp, val);
3792 struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
3793 int changed;
3794 if (unlikely(idev == NULL))
3795 return -ENODEV;
3796 changed = (!*valp) ^ (!new);
3797 *valp = new;
3798 if (changed)
3799 dev_forward_change(idev);
3800 } else {
3801 *valp = new;
3802 addrconf_forward_change();
3803 }
3804
3805 if (*valp)
3806 rt6_purge_dflt_routers();
3807 } else
3808 *valp = new;
3809
3810 return 1; 3861 return 1;
3811} 3862}
3812 3863
@@ -3814,10 +3865,7 @@ static struct addrconf_sysctl_table
3814{ 3865{
3815 struct ctl_table_header *sysctl_header; 3866 struct ctl_table_header *sysctl_header;
3816 ctl_table addrconf_vars[__NET_IPV6_MAX]; 3867 ctl_table addrconf_vars[__NET_IPV6_MAX];
3817 ctl_table addrconf_dev[2]; 3868 char *dev_name;
3818 ctl_table addrconf_conf_dir[2];
3819 ctl_table addrconf_proto_dir[2];
3820 ctl_table addrconf_root_dir[2];
3821} addrconf_sysctl __read_mostly = { 3869} addrconf_sysctl __read_mostly = {
3822 .sysctl_header = NULL, 3870 .sysctl_header = NULL,
3823 .addrconf_vars = { 3871 .addrconf_vars = {
@@ -4038,72 +4086,33 @@ static struct addrconf_sysctl_table
4038 .ctl_name = 0, /* sentinel */ 4086 .ctl_name = 0, /* sentinel */
4039 } 4087 }
4040 }, 4088 },
4041 .addrconf_dev = {
4042 {
4043 .ctl_name = NET_PROTO_CONF_ALL,
4044 .procname = "all",
4045 .mode = 0555,
4046 .child = addrconf_sysctl.addrconf_vars,
4047 },
4048 {
4049 .ctl_name = 0, /* sentinel */
4050 }
4051 },
4052 .addrconf_conf_dir = {
4053 {
4054 .ctl_name = NET_IPV6_CONF,
4055 .procname = "conf",
4056 .mode = 0555,
4057 .child = addrconf_sysctl.addrconf_dev,
4058 },
4059 {
4060 .ctl_name = 0, /* sentinel */
4061 }
4062 },
4063 .addrconf_proto_dir = {
4064 {
4065 .ctl_name = NET_IPV6,
4066 .procname = "ipv6",
4067 .mode = 0555,
4068 .child = addrconf_sysctl.addrconf_conf_dir,
4069 },
4070 {
4071 .ctl_name = 0, /* sentinel */
4072 }
4073 },
4074 .addrconf_root_dir = {
4075 {
4076 .ctl_name = CTL_NET,
4077 .procname = "net",
4078 .mode = 0555,
4079 .child = addrconf_sysctl.addrconf_proto_dir,
4080 },
4081 {
4082 .ctl_name = 0, /* sentinel */
4083 }
4084 },
4085}; 4089};
4086 4090
4087static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) 4091static int __addrconf_sysctl_register(struct net *net, char *dev_name,
4092 int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p)
4088{ 4093{
4089 int i; 4094 int i;
4090 struct net_device *dev = idev ? idev->dev : NULL;
4091 struct addrconf_sysctl_table *t; 4095 struct addrconf_sysctl_table *t;
4092 char *dev_name = NULL; 4096
4097#define ADDRCONF_CTL_PATH_DEV 3
4098
4099 struct ctl_path addrconf_ctl_path[] = {
4100 { .procname = "net", .ctl_name = CTL_NET, },
4101 { .procname = "ipv6", .ctl_name = NET_IPV6, },
4102 { .procname = "conf", .ctl_name = NET_IPV6_CONF, },
4103 { /* to be set */ },
4104 { },
4105 };
4106
4093 4107
4094 t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); 4108 t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
4095 if (t == NULL) 4109 if (t == NULL)
4096 return; 4110 goto out;
4111
4097 for (i=0; t->addrconf_vars[i].data; i++) { 4112 for (i=0; t->addrconf_vars[i].data; i++) {
4098 t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; 4113 t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
4099 t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ 4114 t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
4100 } 4115 t->addrconf_vars[i].extra2 = net;
4101 if (dev) {
4102 dev_name = dev->name;
4103 t->addrconf_dev[0].ctl_name = dev->ifindex;
4104 } else {
4105 dev_name = "default";
4106 t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
4107 } 4116 }
4108 4117
4109 /* 4118 /*
@@ -4111,47 +4120,126 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
4111 * by sysctl and we wouldn't want anyone to change it under our feet 4120 * by sysctl and we wouldn't want anyone to change it under our feet
4112 * (see SIOCSIFNAME). 4121 * (see SIOCSIFNAME).
4113 */ 4122 */
4114 dev_name = kstrdup(dev_name, GFP_KERNEL); 4123 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
4115 if (!dev_name) 4124 if (!t->dev_name)
4116 goto free; 4125 goto free;
4117 4126
4118 t->addrconf_dev[0].procname = dev_name; 4127 addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
4128 addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name;
4119 4129
4120 t->addrconf_dev[0].child = t->addrconf_vars; 4130 t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path,
4121 t->addrconf_conf_dir[0].child = t->addrconf_dev; 4131 t->addrconf_vars);
4122 t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
4123 t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
4124
4125 t->sysctl_header = register_sysctl_table(t->addrconf_root_dir);
4126 if (t->sysctl_header == NULL) 4132 if (t->sysctl_header == NULL)
4127 goto free_procname; 4133 goto free_procname;
4128 else
4129 p->sysctl = t;
4130 return;
4131 4134
4132 /* error path */ 4135 p->sysctl = t;
4133 free_procname: 4136 return 0;
4134 kfree(dev_name); 4137
4135 free: 4138free_procname:
4139 kfree(t->dev_name);
4140free:
4141 kfree(t);
4142out:
4143 return -ENOBUFS;
4144}
4145
4146static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
4147{
4148 struct addrconf_sysctl_table *t;
4149
4150 if (p->sysctl == NULL)
4151 return;
4152
4153 t = p->sysctl;
4154 p->sysctl = NULL;
4155 unregister_sysctl_table(t->sysctl_header);
4156 kfree(t->dev_name);
4136 kfree(t); 4157 kfree(t);
4158}
4137 4159
4138 return; 4160static void addrconf_sysctl_register(struct inet6_dev *idev)
4161{
4162 neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
4163 NET_IPV6_NEIGH, "ipv6",
4164 &ndisc_ifinfo_sysctl_change,
4165 NULL);
4166 __addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name,
4167 idev->dev->ifindex, idev, &idev->cnf);
4139} 4168}
4140 4169
4141static void addrconf_sysctl_unregister(struct ipv6_devconf *p) 4170static void addrconf_sysctl_unregister(struct inet6_dev *idev)
4142{ 4171{
4143 if (p->sysctl) { 4172 __addrconf_sysctl_unregister(&idev->cnf);
4144 struct addrconf_sysctl_table *t = p->sysctl; 4173 neigh_sysctl_unregister(idev->nd_parms);
4145 p->sysctl = NULL;
4146 unregister_sysctl_table(t->sysctl_header);
4147 kfree(t->addrconf_dev[0].procname);
4148 kfree(t);
4149 }
4150} 4174}
4151 4175
4152 4176
4153#endif 4177#endif
4154 4178
4179static int addrconf_init_net(struct net *net)
4180{
4181 int err;
4182 struct ipv6_devconf *all, *dflt;
4183
4184 err = -ENOMEM;
4185 all = &ipv6_devconf;
4186 dflt = &ipv6_devconf_dflt;
4187
4188 if (net != &init_net) {
4189 all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
4190 if (all == NULL)
4191 goto err_alloc_all;
4192
4193 dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
4194 if (dflt == NULL)
4195 goto err_alloc_dflt;
4196 }
4197
4198 net->ipv6.devconf_all = all;
4199 net->ipv6.devconf_dflt = dflt;
4200
4201#ifdef CONFIG_SYSCTL
4202 err = __addrconf_sysctl_register(net, "all", NET_PROTO_CONF_ALL,
4203 NULL, all);
4204 if (err < 0)
4205 goto err_reg_all;
4206
4207 err = __addrconf_sysctl_register(net, "default", NET_PROTO_CONF_DEFAULT,
4208 NULL, dflt);
4209 if (err < 0)
4210 goto err_reg_dflt;
4211#endif
4212 return 0;
4213
4214#ifdef CONFIG_SYSCTL
4215err_reg_dflt:
4216 __addrconf_sysctl_unregister(all);
4217err_reg_all:
4218 kfree(dflt);
4219#endif
4220err_alloc_dflt:
4221 kfree(all);
4222err_alloc_all:
4223 return err;
4224}
4225
4226static void addrconf_exit_net(struct net *net)
4227{
4228#ifdef CONFIG_SYSCTL
4229 __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
4230 __addrconf_sysctl_unregister(net->ipv6.devconf_all);
4231#endif
4232 if (net != &init_net) {
4233 kfree(net->ipv6.devconf_dflt);
4234 kfree(net->ipv6.devconf_all);
4235 }
4236}
4237
4238static struct pernet_operations addrconf_ops = {
4239 .init = addrconf_init_net,
4240 .exit = addrconf_exit_net,
4241};
4242
4155/* 4243/*
4156 * Device notifier 4244 * Device notifier
4157 */ 4245 */
@@ -4176,7 +4264,15 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
4176 4264
4177int __init addrconf_init(void) 4265int __init addrconf_init(void)
4178{ 4266{
4179 int err = 0; 4267 int err;
4268
4269 if ((err = ipv6_addr_label_init()) < 0) {
4270 printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
4271 err);
4272 return err;
4273 }
4274
4275 register_pernet_subsys(&addrconf_ops);
4180 4276
4181 /* The addrconf netdev notifier requires that loopback_dev 4277 /* The addrconf netdev notifier requires that loopback_dev
4182 * has it's ipv6 private information allocated and setup 4278 * has it's ipv6 private information allocated and setup
@@ -4201,7 +4297,7 @@ int __init addrconf_init(void)
4201 err = -ENOMEM; 4297 err = -ENOMEM;
4202 rtnl_unlock(); 4298 rtnl_unlock();
4203 if (err) 4299 if (err)
4204 return err; 4300 goto errlo;
4205 4301
4206 ip6_null_entry.u.dst.dev = init_net.loopback_dev; 4302 ip6_null_entry.u.dst.dev = init_net.loopback_dev;
4207 ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); 4303 ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev);
@@ -4227,20 +4323,18 @@ int __init addrconf_init(void)
4227 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); 4323 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
4228 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); 4324 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
4229 4325
4230#ifdef CONFIG_SYSCTL 4326 ipv6_addr_label_rtnl_register();
4231 addrconf_sysctl.sysctl_header =
4232 register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
4233 addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
4234#endif
4235 4327
4236 return 0; 4328 return 0;
4237errout: 4329errout:
4238 unregister_netdevice_notifier(&ipv6_dev_notf); 4330 unregister_netdevice_notifier(&ipv6_dev_notf);
4331errlo:
4332 unregister_pernet_subsys(&addrconf_ops);
4239 4333
4240 return err; 4334 return err;
4241} 4335}
4242 4336
4243void __exit addrconf_cleanup(void) 4337void addrconf_cleanup(void)
4244{ 4338{
4245 struct net_device *dev; 4339 struct net_device *dev;
4246 struct inet6_ifaddr *ifa; 4340 struct inet6_ifaddr *ifa;
@@ -4248,10 +4342,7 @@ void __exit addrconf_cleanup(void)
4248 4342
4249 unregister_netdevice_notifier(&ipv6_dev_notf); 4343 unregister_netdevice_notifier(&ipv6_dev_notf);
4250 4344
4251#ifdef CONFIG_SYSCTL 4345 unregister_pernet_subsys(&addrconf_ops);
4252 addrconf_sysctl_unregister(&ipv6_devconf_dflt);
4253 addrconf_sysctl_unregister(&ipv6_devconf);
4254#endif
4255 4346
4256 rtnl_lock(); 4347 rtnl_lock();
4257 4348
@@ -4288,8 +4379,4 @@ void __exit addrconf_cleanup(void)
4288 del_timer(&addr_chk_timer); 4379 del_timer(&addr_chk_timer);
4289 4380
4290 rtnl_unlock(); 4381 rtnl_unlock();
4291
4292#ifdef CONFIG_PROC_FS
4293 proc_net_remove(&init_net, "if_inet6");
4294#endif
4295} 4382}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
new file mode 100644
index 000000000000..a3c5a72218fd
--- /dev/null
+++ b/net/ipv6/addrlabel.c
@@ -0,0 +1,561 @@
1/*
2 * IPv6 Address Label subsystem
3 * for the IPv6 "Default" Source Address Selection
4 *
5 * Copyright (C)2007 USAGI/WIDE Project
6 */
7/*
8 * Author:
9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
10 */
11
12#include <linux/kernel.h>
13#include <linux/list.h>
14#include <linux/rcupdate.h>
15#include <linux/in6.h>
16#include <net/addrconf.h>
17#include <linux/if_addrlabel.h>
18#include <linux/netlink.h>
19#include <linux/rtnetlink.h>
20
21#if 0
22#define ADDRLABEL(x...) printk(x)
23#else
24#define ADDRLABEL(x...) do { ; } while(0)
25#endif
26
27/*
28 * Policy Table
29 */
30struct ip6addrlbl_entry
31{
32 struct in6_addr prefix;
33 int prefixlen;
34 int ifindex;
35 int addrtype;
36 u32 label;
37 struct hlist_node list;
38 atomic_t refcnt;
39 struct rcu_head rcu;
40};
41
42static struct ip6addrlbl_table
43{
44 struct hlist_head head;
45 spinlock_t lock;
46 u32 seq;
47} ip6addrlbl_table;
48
49/*
50 * Default policy table (RFC3484 + extensions)
51 *
52 * prefix addr_type label
53 * -------------------------------------------------------------------------
54 * ::1/128 LOOPBACK 0
55 * ::/0 N/A 1
56 * 2002::/16 N/A 2
57 * ::/96 COMPATv4 3
58 * ::ffff:0:0/96 V4MAPPED 4
59 * fc00::/7 N/A 5 ULA (RFC 4193)
60 * 2001::/32 N/A 6 Teredo (RFC 4380)
61 *
62 * Note: 0xffffffff is used if we do not have any policies.
63 */
64
65#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
66
67static const __initdata struct ip6addrlbl_init_table
68{
69 const struct in6_addr *prefix;
70 int prefixlen;
71 u32 label;
72} ip6addrlbl_init_table[] = {
73 { /* ::/0 */
74 .prefix = &in6addr_any,
75 .label = 1,
76 },{ /* fc00::/7 */
77 .prefix = &(struct in6_addr){{{ 0xfc }}},
78 .prefixlen = 7,
79 .label = 5,
80 },{ /* 2002::/16 */
81 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
82 .prefixlen = 16,
83 .label = 2,
84 },{ /* 2001::/32 */
85 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
86 .prefixlen = 32,
87 .label = 6,
88 },{ /* ::ffff:0:0 */
89 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
90 .prefixlen = 96,
91 .label = 4,
92 },{ /* ::/96 */
93 .prefix = &in6addr_any,
94 .prefixlen = 96,
95 .label = 3,
96 },{ /* ::1/128 */
97 .prefix = &in6addr_loopback,
98 .prefixlen = 128,
99 .label = 0,
100 }
101};
102
103/* Object management */
104static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
105{
106 kfree(p);
107}
108
109static void ip6addrlbl_free_rcu(struct rcu_head *h)
110{
111 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
112}
113
114static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
115{
116 return atomic_inc_not_zero(&p->refcnt);
117}
118
119static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
120{
121 if (atomic_dec_and_test(&p->refcnt))
122 call_rcu(&p->rcu, ip6addrlbl_free_rcu);
123}
124
125/* Find label */
126static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
127 const struct in6_addr *addr,
128 int addrtype, int ifindex)
129{
130 if (p->ifindex && p->ifindex != ifindex)
131 return 0;
132 if (p->addrtype && p->addrtype != addrtype)
133 return 0;
134 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
135 return 0;
136 return 1;
137}
138
139static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr,
140 int type, int ifindex)
141{
142 struct hlist_node *pos;
143 struct ip6addrlbl_entry *p;
144 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
145 if (__ip6addrlbl_match(p, addr, type, ifindex))
146 return p;
147 }
148 return NULL;
149}
150
151u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
152{
153 u32 label;
154 struct ip6addrlbl_entry *p;
155
156 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
157
158 rcu_read_lock();
159 p = __ipv6_addr_label(addr, type, ifindex);
160 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
161 rcu_read_unlock();
162
163 ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n",
164 __FUNCTION__,
165 NIP6(*addr), type, ifindex,
166 label);
167
168 return label;
169}
170
171/* allocate one entry */
172static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
173 int prefixlen, int ifindex,
174 u32 label)
175{
176 struct ip6addrlbl_entry *newp;
177 int addrtype;
178
179 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n",
180 __FUNCTION__,
181 NIP6(*prefix), prefixlen,
182 ifindex,
183 (unsigned int)label);
184
185 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
186
187 switch (addrtype) {
188 case IPV6_ADDR_MAPPED:
189 if (prefixlen > 96)
190 return ERR_PTR(-EINVAL);
191 if (prefixlen < 96)
192 addrtype = 0;
193 break;
194 case IPV6_ADDR_COMPATv4:
195 if (prefixlen != 96)
196 addrtype = 0;
197 break;
198 case IPV6_ADDR_LOOPBACK:
199 if (prefixlen != 128)
200 addrtype = 0;
201 break;
202 }
203
204 newp = kmalloc(sizeof(*newp), GFP_KERNEL);
205 if (!newp)
206 return ERR_PTR(-ENOMEM);
207
208 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
209 newp->prefixlen = prefixlen;
210 newp->ifindex = ifindex;
211 newp->addrtype = addrtype;
212 newp->label = label;
213 INIT_HLIST_NODE(&newp->list);
214 atomic_set(&newp->refcnt, 1);
215 return newp;
216}
217
218/* add a label */
219static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
220{
221 int ret = 0;
222
223 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
224 __FUNCTION__,
225 newp, replace);
226
227 if (hlist_empty(&ip6addrlbl_table.head)) {
228 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
229 } else {
230 struct hlist_node *pos, *n;
231 struct ip6addrlbl_entry *p = NULL;
232 hlist_for_each_entry_safe(p, pos, n,
233 &ip6addrlbl_table.head, list) {
234 if (p->prefixlen == newp->prefixlen &&
235 p->ifindex == newp->ifindex &&
236 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
237 if (!replace) {
238 ret = -EEXIST;
239 goto out;
240 }
241 hlist_replace_rcu(&p->list, &newp->list);
242 ip6addrlbl_put(p);
243 goto out;
244 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
245 (p->prefixlen < newp->prefixlen)) {
246 hlist_add_before_rcu(&newp->list, &p->list);
247 goto out;
248 }
249 }
250 hlist_add_after_rcu(&p->list, &newp->list);
251 }
252out:
253 if (!ret)
254 ip6addrlbl_table.seq++;
255 return ret;
256}
257
258/* add a label */
259static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
260 int ifindex, u32 label, int replace)
261{
262 struct ip6addrlbl_entry *newp;
263 int ret = 0;
264
265 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
266 __FUNCTION__,
267 NIP6(*prefix), prefixlen,
268 ifindex,
269 (unsigned int)label,
270 replace);
271
272 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
273 if (IS_ERR(newp))
274 return PTR_ERR(newp);
275 spin_lock(&ip6addrlbl_table.lock);
276 ret = __ip6addrlbl_add(newp, replace);
277 spin_unlock(&ip6addrlbl_table.lock);
278 if (ret)
279 ip6addrlbl_free(newp);
280 return ret;
281}
282
283/* remove a label */
284static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
285 int ifindex)
286{
287 struct ip6addrlbl_entry *p = NULL;
288 struct hlist_node *pos, *n;
289 int ret = -ESRCH;
290
291 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
292 __FUNCTION__,
293 NIP6(*prefix), prefixlen,
294 ifindex);
295
296 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
297 if (p->prefixlen == prefixlen &&
298 p->ifindex == ifindex &&
299 ipv6_addr_equal(&p->prefix, prefix)) {
300 hlist_del_rcu(&p->list);
301 ip6addrlbl_put(p);
302 ret = 0;
303 break;
304 }
305 }
306 return ret;
307}
308
309static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
310 int ifindex)
311{
312 struct in6_addr prefix_buf;
313 int ret;
314
315 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
316 __FUNCTION__,
317 NIP6(*prefix), prefixlen,
318 ifindex);
319
320 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
321 spin_lock(&ip6addrlbl_table.lock);
322 ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex);
323 spin_unlock(&ip6addrlbl_table.lock);
324 return ret;
325}
326
327/* add default label */
328static __init int ip6addrlbl_init(void)
329{
330 int err = 0;
331 int i;
332
333 ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__);
334
335 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
336 int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
337 ip6addrlbl_init_table[i].prefixlen,
338 0,
339 ip6addrlbl_init_table[i].label, 0);
340 /* XXX: should we free all rules when we catch an error? */
341 if (ret && (!err || err != -ENOMEM))
342 err = ret;
343 }
344 return err;
345}
346
347int __init ipv6_addr_label_init(void)
348{
349 spin_lock_init(&ip6addrlbl_table.lock);
350
351 return ip6addrlbl_init();
352}
353
354static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
355 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), },
356 [IFAL_LABEL] = { .len = sizeof(u32), },
357};
358
359static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
360 void *arg)
361{
362 struct net *net = skb->sk->sk_net;
363 struct ifaddrlblmsg *ifal;
364 struct nlattr *tb[IFAL_MAX+1];
365 struct in6_addr *pfx;
366 u32 label;
367 int err = 0;
368
369 if (net != &init_net)
370 return 0;
371
372 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
373 if (err < 0)
374 return err;
375
376 ifal = nlmsg_data(nlh);
377
378 if (ifal->ifal_family != AF_INET6 ||
379 ifal->ifal_prefixlen > 128)
380 return -EINVAL;
381
382 if (ifal->ifal_index &&
383 !__dev_get_by_index(&init_net, ifal->ifal_index))
384 return -EINVAL;
385
386 if (!tb[IFAL_ADDRESS])
387 return -EINVAL;
388
389 pfx = nla_data(tb[IFAL_ADDRESS]);
390 if (!pfx)
391 return -EINVAL;
392
393 if (!tb[IFAL_LABEL])
394 return -EINVAL;
395 label = nla_get_u32(tb[IFAL_LABEL]);
396 if (label == IPV6_ADDR_LABEL_DEFAULT)
397 return -EINVAL;
398
399 switch(nlh->nlmsg_type) {
400 case RTM_NEWADDRLABEL:
401 err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen,
402 ifal->ifal_index, label,
403 nlh->nlmsg_flags & NLM_F_REPLACE);
404 break;
405 case RTM_DELADDRLABEL:
406 err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen,
407 ifal->ifal_index);
408 break;
409 default:
410 err = -EOPNOTSUPP;
411 }
412 return err;
413}
414
415static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
416 int prefixlen, int ifindex, u32 lseq)
417{
418 struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
419 ifal->ifal_family = AF_INET6;
420 ifal->ifal_prefixlen = prefixlen;
421 ifal->ifal_flags = 0;
422 ifal->ifal_index = ifindex;
423 ifal->ifal_seq = lseq;
424};
425
426static int ip6addrlbl_fill(struct sk_buff *skb,
427 struct ip6addrlbl_entry *p,
428 u32 lseq,
429 u32 pid, u32 seq, int event,
430 unsigned int flags)
431{
432 struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
433 sizeof(struct ifaddrlblmsg), flags);
434 if (!nlh)
435 return -EMSGSIZE;
436
437 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
438
439 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
440 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
441 nlmsg_cancel(skb, nlh);
442 return -EMSGSIZE;
443 }
444
445 return nlmsg_end(skb, nlh);
446}
447
448static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
449{
450 struct net *net = skb->sk->sk_net;
451 struct ip6addrlbl_entry *p;
452 struct hlist_node *pos;
453 int idx = 0, s_idx = cb->args[0];
454 int err;
455
456 if (net != &init_net)
457 return 0;
458
459 rcu_read_lock();
460 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
461 if (idx >= s_idx) {
462 if ((err = ip6addrlbl_fill(skb, p,
463 ip6addrlbl_table.seq,
464 NETLINK_CB(cb->skb).pid,
465 cb->nlh->nlmsg_seq,
466 RTM_NEWADDRLABEL,
467 NLM_F_MULTI)) <= 0)
468 break;
469 }
470 idx++;
471 }
472 rcu_read_unlock();
473 cb->args[0] = idx;
474 return skb->len;
475}
476
477static inline int ip6addrlbl_msgsize(void)
478{
479 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
480 + nla_total_size(16) /* IFAL_ADDRESS */
481 + nla_total_size(4) /* IFAL_LABEL */
482 );
483}
484
485static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
486 void *arg)
487{
488 struct net *net = in_skb->sk->sk_net;
489 struct ifaddrlblmsg *ifal;
490 struct nlattr *tb[IFAL_MAX+1];
491 struct in6_addr *addr;
492 u32 lseq;
493 int err = 0;
494 struct ip6addrlbl_entry *p;
495 struct sk_buff *skb;
496
497 if (net != &init_net)
498 return 0;
499
500 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
501 if (err < 0)
502 return err;
503
504 ifal = nlmsg_data(nlh);
505
506 if (ifal->ifal_family != AF_INET6 ||
507 ifal->ifal_prefixlen != 128)
508 return -EINVAL;
509
510 if (ifal->ifal_index &&
511 !__dev_get_by_index(&init_net, ifal->ifal_index))
512 return -EINVAL;
513
514 if (!tb[IFAL_ADDRESS])
515 return -EINVAL;
516
517 addr = nla_data(tb[IFAL_ADDRESS]);
518 if (!addr)
519 return -EINVAL;
520
521 rcu_read_lock();
522 p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index);
523 if (p && ip6addrlbl_hold(p))
524 p = NULL;
525 lseq = ip6addrlbl_table.seq;
526 rcu_read_unlock();
527
528 if (!p) {
529 err = -ESRCH;
530 goto out;
531 }
532
533 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
534 ip6addrlbl_put(p);
535 return -ENOBUFS;
536 }
537
538 err = ip6addrlbl_fill(skb, p, lseq,
539 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
540 RTM_NEWADDRLABEL, 0);
541
542 ip6addrlbl_put(p);
543
544 if (err < 0) {
545 WARN_ON(err == -EMSGSIZE);
546 kfree_skb(skb);
547 goto out;
548 }
549
550 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
551out:
552 return err;
553}
554
555void __init ipv6_addr_label_rtnl_register(void)
556{
557 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
558 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
559 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
560}
561
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1b1caf3aa1c1..bddac0e8780f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -66,9 +66,7 @@ MODULE_AUTHOR("Cast of dozens");
66MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); 66MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
67MODULE_LICENSE("GPL"); 67MODULE_LICENSE("GPL");
68 68
69int sysctl_ipv6_bindv6only __read_mostly; 69/* The inetsw6 table contains everything that inet6_create needs to
70
71/* The inetsw table contains everything that inet_create needs to
72 * build a new socket. 70 * build a new socket.
73 */ 71 */
74static struct list_head inetsw6[SOCK_MAX]; 72static struct list_head inetsw6[SOCK_MAX];
@@ -162,7 +160,7 @@ lookup_protocol:
162 BUG_TRAP(answer_prot->slab != NULL); 160 BUG_TRAP(answer_prot->slab != NULL);
163 161
164 err = -ENOBUFS; 162 err = -ENOBUFS;
165 sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); 163 sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot);
166 if (sk == NULL) 164 if (sk == NULL)
167 goto out; 165 goto out;
168 166
@@ -193,7 +191,7 @@ lookup_protocol:
193 np->mcast_hops = -1; 191 np->mcast_hops = -1;
194 np->mc_loop = 1; 192 np->mc_loop = 1;
195 np->pmtudisc = IPV6_PMTUDISC_WANT; 193 np->pmtudisc = IPV6_PMTUDISC_WANT;
196 np->ipv6only = sysctl_ipv6_bindv6only; 194 np->ipv6only = init_net.ipv6.sysctl.bindv6only;
197 195
198 /* Init the ipv4 part of the socket since we can have sockets 196 /* Init the ipv4 part of the socket since we can have sockets
199 * using v6 API for ipv4. 197 * using v6 API for ipv4.
@@ -280,7 +278,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
280 /* Check if the address belongs to the host. */ 278 /* Check if the address belongs to the host. */
281 if (addr_type == IPV6_ADDR_MAPPED) { 279 if (addr_type == IPV6_ADDR_MAPPED) {
282 v4addr = addr->sin6_addr.s6_addr32[3]; 280 v4addr = addr->sin6_addr.s6_addr32[3];
283 if (inet_addr_type(v4addr) != RTN_LOCAL) { 281 if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) {
284 err = -EADDRNOTAVAIL; 282 err = -EADDRNOTAVAIL;
285 goto out; 283 goto out;
286 } 284 }
@@ -314,7 +312,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
314 */ 312 */
315 v4addr = LOOPBACK4_IPV6; 313 v4addr = LOOPBACK4_IPV6;
316 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 314 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
317 if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { 315 if (!ipv6_chk_addr(&init_net, &addr->sin6_addr,
316 dev, 0)) {
318 if (dev) 317 if (dev)
319 dev_put(dev); 318 dev_put(dev);
320 err = -EADDRNOTAVAIL; 319 err = -EADDRNOTAVAIL;
@@ -491,6 +490,7 @@ const struct proto_ops inet6_stream_ops = {
491 .recvmsg = sock_common_recvmsg, /* ok */ 490 .recvmsg = sock_common_recvmsg, /* ok */
492 .mmap = sock_no_mmap, 491 .mmap = sock_no_mmap,
493 .sendpage = tcp_sendpage, 492 .sendpage = tcp_sendpage,
493 .splice_read = tcp_splice_read,
494#ifdef CONFIG_COMPAT 494#ifdef CONFIG_COMPAT
495 .compat_setsockopt = compat_sock_common_setsockopt, 495 .compat_setsockopt = compat_sock_common_setsockopt,
496 .compat_getsockopt = compat_sock_common_getsockopt, 496 .compat_getsockopt = compat_sock_common_getsockopt,
@@ -528,57 +528,23 @@ static struct net_proto_family inet6_family_ops = {
528 .owner = THIS_MODULE, 528 .owner = THIS_MODULE,
529}; 529};
530 530
531/* Same as inet6_dgram_ops, sans udp_poll. */ 531int inet6_register_protosw(struct inet_protosw *p)
532static const struct proto_ops inet6_sockraw_ops = {
533 .family = PF_INET6,
534 .owner = THIS_MODULE,
535 .release = inet6_release,
536 .bind = inet6_bind,
537 .connect = inet_dgram_connect, /* ok */
538 .socketpair = sock_no_socketpair, /* a do nothing */
539 .accept = sock_no_accept, /* a do nothing */
540 .getname = inet6_getname,
541 .poll = datagram_poll, /* ok */
542 .ioctl = inet6_ioctl, /* must change */
543 .listen = sock_no_listen, /* ok */
544 .shutdown = inet_shutdown, /* ok */
545 .setsockopt = sock_common_setsockopt, /* ok */
546 .getsockopt = sock_common_getsockopt, /* ok */
547 .sendmsg = inet_sendmsg, /* ok */
548 .recvmsg = sock_common_recvmsg, /* ok */
549 .mmap = sock_no_mmap,
550 .sendpage = sock_no_sendpage,
551#ifdef CONFIG_COMPAT
552 .compat_setsockopt = compat_sock_common_setsockopt,
553 .compat_getsockopt = compat_sock_common_getsockopt,
554#endif
555};
556
557static struct inet_protosw rawv6_protosw = {
558 .type = SOCK_RAW,
559 .protocol = IPPROTO_IP, /* wild card */
560 .prot = &rawv6_prot,
561 .ops = &inet6_sockraw_ops,
562 .capability = CAP_NET_RAW,
563 .no_check = UDP_CSUM_DEFAULT,
564 .flags = INET_PROTOSW_REUSE,
565};
566
567void
568inet6_register_protosw(struct inet_protosw *p)
569{ 532{
570 struct list_head *lh; 533 struct list_head *lh;
571 struct inet_protosw *answer; 534 struct inet_protosw *answer;
572 int protocol = p->protocol;
573 struct list_head *last_perm; 535 struct list_head *last_perm;
536 int protocol = p->protocol;
537 int ret;
574 538
575 spin_lock_bh(&inetsw6_lock); 539 spin_lock_bh(&inetsw6_lock);
576 540
541 ret = -EINVAL;
577 if (p->type >= SOCK_MAX) 542 if (p->type >= SOCK_MAX)
578 goto out_illegal; 543 goto out_illegal;
579 544
580 /* If we are trying to override a permanent protocol, bail. */ 545 /* If we are trying to override a permanent protocol, bail. */
581 answer = NULL; 546 answer = NULL;
547 ret = -EPERM;
582 last_perm = &inetsw6[p->type]; 548 last_perm = &inetsw6[p->type];
583 list_for_each(lh, &inetsw6[p->type]) { 549 list_for_each(lh, &inetsw6[p->type]) {
584 answer = list_entry(lh, struct inet_protosw, list); 550 answer = list_entry(lh, struct inet_protosw, list);
@@ -602,9 +568,10 @@ inet6_register_protosw(struct inet_protosw *p)
602 * system automatically returns to the old behavior. 568 * system automatically returns to the old behavior.
603 */ 569 */
604 list_add_rcu(&p->list, last_perm); 570 list_add_rcu(&p->list, last_perm);
571 ret = 0;
605out: 572out:
606 spin_unlock_bh(&inetsw6_lock); 573 spin_unlock_bh(&inetsw6_lock);
607 return; 574 return ret;
608 575
609out_permanent: 576out_permanent:
610 printk(KERN_ERR "Attempt to override permanent protocol %d.\n", 577 printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
@@ -713,20 +680,19 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
713 680
714static int __init init_ipv6_mibs(void) 681static int __init init_ipv6_mibs(void)
715{ 682{
716 if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib), 683 if (snmp_mib_init((void **)ipv6_statistics,
717 __alignof__(struct ipstats_mib)) < 0) 684 sizeof(struct ipstats_mib)) < 0)
718 goto err_ip_mib; 685 goto err_ip_mib;
719 if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib), 686 if (snmp_mib_init((void **)icmpv6_statistics,
720 __alignof__(struct icmpv6_mib)) < 0) 687 sizeof(struct icmpv6_mib)) < 0)
721 goto err_icmp_mib; 688 goto err_icmp_mib;
722 if (snmp_mib_init((void **)icmpv6msg_statistics, 689 if (snmp_mib_init((void **)icmpv6msg_statistics,
723 sizeof (struct icmpv6msg_mib), __alignof__(struct icmpv6_mib)) < 0) 690 sizeof(struct icmpv6msg_mib)) < 0)
724 goto err_icmpmsg_mib; 691 goto err_icmpmsg_mib;
725 if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), 692 if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0)
726 __alignof__(struct udp_mib)) < 0)
727 goto err_udp_mib; 693 goto err_udp_mib;
728 if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib), 694 if (snmp_mib_init((void **)udplite_stats_in6,
729 __alignof__(struct udp_mib)) < 0) 695 sizeof (struct udp_mib)) < 0)
730 goto err_udplite_mib; 696 goto err_udplite_mib;
731 return 0; 697 return 0;
732 698
@@ -752,6 +718,32 @@ static void cleanup_ipv6_mibs(void)
752 snmp_mib_free((void **)udplite_stats_in6); 718 snmp_mib_free((void **)udplite_stats_in6);
753} 719}
754 720
721static int inet6_net_init(struct net *net)
722{
723 net->ipv6.sysctl.bindv6only = 0;
724 net->ipv6.sysctl.flush_delay = 0;
725 net->ipv6.sysctl.ip6_rt_max_size = 4096;
726 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
727 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
728 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
729 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
730 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
731 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
732 net->ipv6.sysctl.icmpv6_time = 1*HZ;
733
734 return 0;
735}
736
737static void inet6_net_exit(struct net *net)
738{
739 return;
740}
741
742static struct pernet_operations inet6_net_ops = {
743 .init = inet6_net_init,
744 .exit = inet6_net_exit,
745};
746
755static int __init inet6_init(void) 747static int __init inet6_init(void)
756{ 748{
757 struct sk_buff *dummy_skb; 749 struct sk_buff *dummy_skb;
@@ -768,7 +760,6 @@ static int __init inet6_init(void)
768 __this_module.can_unload = &ipv6_unload; 760 __this_module.can_unload = &ipv6_unload;
769#endif 761#endif
770#endif 762#endif
771
772 err = proto_register(&tcpv6_prot, 1); 763 err = proto_register(&tcpv6_prot, 1);
773 if (err) 764 if (err)
774 goto out; 765 goto out;
@@ -793,14 +784,16 @@ static int __init inet6_init(void)
793 /* We MUST register RAW sockets before we create the ICMP6, 784 /* We MUST register RAW sockets before we create the ICMP6,
794 * IGMP6, or NDISC control sockets. 785 * IGMP6, or NDISC control sockets.
795 */ 786 */
796 inet6_register_protosw(&rawv6_protosw); 787 err = rawv6_init();
788 if (err)
789 goto out_unregister_raw_proto;
797 790
798 /* Register the family here so that the init calls below will 791 /* Register the family here so that the init calls below will
799 * be able to create sockets. (?? is this dangerous ??) 792 * be able to create sockets. (?? is this dangerous ??)
800 */ 793 */
801 err = sock_register(&inet6_family_ops); 794 err = sock_register(&inet6_family_ops);
802 if (err) 795 if (err)
803 goto out_unregister_raw_proto; 796 goto out_sock_register_fail;
804 797
805 /* Initialise ipv6 mibs */ 798 /* Initialise ipv6 mibs */
806 err = init_ipv6_mibs(); 799 err = init_ipv6_mibs();
@@ -814,8 +807,14 @@ static int __init inet6_init(void)
814 * able to communicate via both network protocols. 807 * able to communicate via both network protocols.
815 */ 808 */
816 809
810 err = register_pernet_subsys(&inet6_net_ops);
811 if (err)
812 goto register_pernet_fail;
813
817#ifdef CONFIG_SYSCTL 814#ifdef CONFIG_SYSCTL
818 ipv6_sysctl_register(); 815 err = ipv6_sysctl_register();
816 if (err)
817 goto sysctl_fail;
819#endif 818#endif
820 err = icmpv6_init(&inet6_family_ops); 819 err = icmpv6_init(&inet6_family_ops);
821 if (err) 820 if (err)
@@ -848,31 +847,61 @@ static int __init inet6_init(void)
848 if (if6_proc_init()) 847 if (if6_proc_init())
849 goto proc_if6_fail; 848 goto proc_if6_fail;
850#endif 849#endif
851 ip6_route_init(); 850 err = ip6_route_init();
852 ip6_flowlabel_init(); 851 if (err)
852 goto ip6_route_fail;
853 err = ip6_flowlabel_init();
854 if (err)
855 goto ip6_flowlabel_fail;
853 err = addrconf_init(); 856 err = addrconf_init();
854 if (err) 857 if (err)
855 goto addrconf_fail; 858 goto addrconf_fail;
856 859
857 /* Init v6 extension headers. */ 860 /* Init v6 extension headers. */
858 ipv6_rthdr_init(); 861 err = ipv6_exthdrs_init();
859 ipv6_frag_init(); 862 if (err)
860 ipv6_nodata_init(); 863 goto ipv6_exthdrs_fail;
861 ipv6_destopt_init(); 864
865 err = ipv6_frag_init();
866 if (err)
867 goto ipv6_frag_fail;
862 868
863 /* Init v6 transport protocols. */ 869 /* Init v6 transport protocols. */
864 udpv6_init(); 870 err = udpv6_init();
865 udplitev6_init(); 871 if (err)
866 tcpv6_init(); 872 goto udpv6_fail;
867 873
868 ipv6_packet_init(); 874 err = udplitev6_init();
869 err = 0; 875 if (err)
876 goto udplitev6_fail;
877
878 err = tcpv6_init();
879 if (err)
880 goto tcpv6_fail;
881
882 err = ipv6_packet_init();
883 if (err)
884 goto ipv6_packet_fail;
870out: 885out:
871 return err; 886 return err;
872 887
888ipv6_packet_fail:
889 tcpv6_exit();
890tcpv6_fail:
891 udplitev6_exit();
892udplitev6_fail:
893 udpv6_exit();
894udpv6_fail:
895 ipv6_frag_exit();
896ipv6_frag_fail:
897 ipv6_exthdrs_exit();
898ipv6_exthdrs_fail:
899 addrconf_cleanup();
873addrconf_fail: 900addrconf_fail:
874 ip6_flowlabel_cleanup(); 901 ip6_flowlabel_cleanup();
902ip6_flowlabel_fail:
875 ip6_route_cleanup(); 903 ip6_route_cleanup();
904ip6_route_fail:
876#ifdef CONFIG_PROC_FS 905#ifdef CONFIG_PROC_FS
877 if6_proc_exit(); 906 if6_proc_exit();
878proc_if6_fail: 907proc_if6_fail:
@@ -899,10 +928,16 @@ ndisc_fail:
899icmp_fail: 928icmp_fail:
900#ifdef CONFIG_SYSCTL 929#ifdef CONFIG_SYSCTL
901 ipv6_sysctl_unregister(); 930 ipv6_sysctl_unregister();
931sysctl_fail:
902#endif 932#endif
933 unregister_pernet_subsys(&inet6_net_ops);
934register_pernet_fail:
903 cleanup_ipv6_mibs(); 935 cleanup_ipv6_mibs();
904out_unregister_sock: 936out_unregister_sock:
905 sock_unregister(PF_INET6); 937 sock_unregister(PF_INET6);
938 rtnl_unregister_all(PF_INET6);
939out_sock_register_fail:
940 rawv6_exit();
906out_unregister_raw_proto: 941out_unregister_raw_proto:
907 proto_unregister(&rawv6_prot); 942 proto_unregister(&rawv6_prot);
908out_unregister_udplite_proto: 943out_unregister_udplite_proto:
@@ -922,9 +957,14 @@ static void __exit inet6_exit(void)
922 /* Disallow any further netlink messages */ 957 /* Disallow any further netlink messages */
923 rtnl_unregister_all(PF_INET6); 958 rtnl_unregister_all(PF_INET6);
924 959
960 udpv6_exit();
961 udplitev6_exit();
962 tcpv6_exit();
963
925 /* Cleanup code parts. */ 964 /* Cleanup code parts. */
926 ipv6_packet_cleanup(); 965 ipv6_packet_cleanup();
927 966 ipv6_frag_exit();
967 ipv6_exthdrs_exit();
928 addrconf_cleanup(); 968 addrconf_cleanup();
929 ip6_flowlabel_cleanup(); 969 ip6_flowlabel_cleanup();
930 ip6_route_cleanup(); 970 ip6_route_cleanup();
@@ -943,9 +983,11 @@ static void __exit inet6_exit(void)
943 igmp6_cleanup(); 983 igmp6_cleanup();
944 ndisc_cleanup(); 984 ndisc_cleanup();
945 icmpv6_cleanup(); 985 icmpv6_cleanup();
986 rawv6_exit();
946#ifdef CONFIG_SYSCTL 987#ifdef CONFIG_SYSCTL
947 ipv6_sysctl_unregister(); 988 ipv6_sysctl_unregister();
948#endif 989#endif
990 unregister_pernet_subsys(&inet6_net_ops);
949 cleanup_ipv6_mibs(); 991 cleanup_ipv6_mibs();
950 proto_unregister(&rawv6_prot); 992 proto_unregister(&rawv6_prot);
951 proto_unregister(&udplitev6_prot); 993 proto_unregister(&udplitev6_prot);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 66a9139d46e9..379c8e04c36c 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -35,7 +35,6 @@
35#include <net/ipv6.h> 35#include <net/ipv6.h>
36#include <net/protocol.h> 36#include <net/protocol.h>
37#include <net/xfrm.h> 37#include <net/xfrm.h>
38#include <asm/scatterlist.h>
39 38
40static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) 39static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
41{ 40{
@@ -371,6 +370,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
371 ip6h->flow_lbl[2] = 0; 370 ip6h->flow_lbl[2] = 0;
372 ip6h->hop_limit = 0; 371 ip6h->hop_limit = 0;
373 372
373 spin_lock(&x->lock);
374 { 374 {
375 u8 auth_data[MAX_AH_AUTH_LEN]; 375 u8 auth_data[MAX_AH_AUTH_LEN];
376 376
@@ -379,14 +379,15 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
379 skb_push(skb, hdr_len); 379 skb_push(skb, hdr_len);
380 err = ah_mac_digest(ahp, skb, ah->auth_data); 380 err = ah_mac_digest(ahp, skb, ah->auth_data);
381 if (err) 381 if (err)
382 goto free_out; 382 goto unlock;
383 err = -EINVAL; 383 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
384 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { 384 err = -EBADMSG;
385 LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
386 x->stats.integrity_failed++;
387 goto free_out;
388 }
389 } 385 }
386unlock:
387 spin_unlock(&x->lock);
388
389 if (err)
390 goto free_out;
390 391
391 skb->network_header += ah_hlen; 392 skb->network_header += ah_hlen;
392 memcpy(skb_network_header(skb), tmp_hdr, hdr_len); 393 memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
@@ -514,7 +515,7 @@ static void ah6_destroy(struct xfrm_state *x)
514 kfree(ahp); 515 kfree(ahp);
515} 516}
516 517
517static struct xfrm_type ah6_type = 518static const struct xfrm_type ah6_type =
518{ 519{
519 .description = "AH6", 520 .description = "AH6",
520 .owner = THIS_MODULE, 521 .owner = THIS_MODULE,
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f915c4df9820..9c7f83fbc3a1 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -89,7 +89,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
89 return -EPERM; 89 return -EPERM;
90 if (ipv6_addr_is_multicast(addr)) 90 if (ipv6_addr_is_multicast(addr))
91 return -EINVAL; 91 return -EINVAL;
92 if (ipv6_chk_addr(addr, NULL, 0)) 92 if (ipv6_chk_addr(&init_net, addr, NULL, 0))
93 return -EINVAL; 93 return -EINVAL;
94 94
95 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); 95 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -504,6 +504,7 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
504} 504}
505 505
506static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) 506static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
507 __acquires(dev_base_lock)
507{ 508{
508 read_lock(&dev_base_lock); 509 read_lock(&dev_base_lock);
509 return ac6_get_idx(seq, *pos); 510 return ac6_get_idx(seq, *pos);
@@ -518,6 +519,7 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
518} 519}
519 520
520static void ac6_seq_stop(struct seq_file *seq, void *v) 521static void ac6_seq_stop(struct seq_file *seq, void *v)
522 __releases(dev_base_lock)
521{ 523{
522 struct ac6_iter_state *state = ac6_seq_private(seq); 524 struct ac6_iter_state *state = ac6_seq_private(seq);
523 if (likely(state->idev != NULL)) { 525 if (likely(state->idev != NULL)) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2ed689ac449e..94fa6ae77cfe 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -123,11 +123,11 @@ ipv4_connected:
123 goto out; 123 goto out;
124 } 124 }
125 sk->sk_bound_dev_if = usin->sin6_scope_id; 125 sk->sk_bound_dev_if = usin->sin6_scope_id;
126 if (!sk->sk_bound_dev_if &&
127 (addr_type & IPV6_ADDR_MULTICAST))
128 fl.oif = np->mcast_oif;
129 } 126 }
130 127
128 if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
129 sk->sk_bound_dev_if = np->mcast_oif;
130
131 /* Connect to link-local address requires an interface */ 131 /* Connect to link-local address requires an interface */
132 if (!sk->sk_bound_dev_if) { 132 if (!sk->sk_bound_dev_if) {
133 err = -EINVAL; 133 err = -EINVAL;
@@ -177,7 +177,7 @@ ipv4_connected:
177 if (final_p) 177 if (final_p)
178 ipv6_addr_copy(&fl.fl6_dst, final_p); 178 ipv6_addr_copy(&fl.fl6_dst, final_p);
179 179
180 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 180 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
181 if (err == -EREMOTE) 181 if (err == -EREMOTE)
182 err = ip6_dst_blackhole(sk, &dst, &fl); 182 err = ip6_dst_blackhole(sk, &dst, &fl);
183 if (err < 0) 183 if (err < 0)
@@ -549,7 +549,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
549 return -ENODEV; 549 return -ENODEV;
550 } 550 }
551 } 551 }
552 if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) { 552 if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
553 dev, 0)) {
553 if (dev) 554 if (dev)
554 dev_put(dev); 555 dev_put(dev);
555 err = -EINVAL; 556 err = -EINVAL;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ab17b5e62355..8e0f1428c716 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -24,33 +24,124 @@
24 * This file is derived from net/ipv4/esp.c 24 * This file is derived from net/ipv4/esp.c
25 */ 25 */
26 26
27#include <crypto/aead.h>
28#include <crypto/authenc.h>
27#include <linux/err.h> 29#include <linux/err.h>
28#include <linux/module.h> 30#include <linux/module.h>
29#include <net/ip.h> 31#include <net/ip.h>
30#include <net/xfrm.h> 32#include <net/xfrm.h>
31#include <net/esp.h> 33#include <net/esp.h>
32#include <linux/scatterlist.h> 34#include <linux/scatterlist.h>
33#include <linux/crypto.h>
34#include <linux/kernel.h> 35#include <linux/kernel.h>
35#include <linux/pfkeyv2.h> 36#include <linux/pfkeyv2.h>
36#include <linux/random.h> 37#include <linux/random.h>
38#include <linux/slab.h>
37#include <linux/spinlock.h> 39#include <linux/spinlock.h>
38#include <net/icmp.h> 40#include <net/icmp.h>
39#include <net/ipv6.h> 41#include <net/ipv6.h>
40#include <net/protocol.h> 42#include <net/protocol.h>
41#include <linux/icmpv6.h> 43#include <linux/icmpv6.h>
42 44
45struct esp_skb_cb {
46 struct xfrm_skb_cb xfrm;
47 void *tmp;
48};
49
50#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
51
52/*
53 * Allocate an AEAD request structure with extra space for SG and IV.
54 *
55 * For alignment considerations the IV is placed at the front, followed
56 * by the request and finally the SG list.
57 *
58 * TODO: Use spare space in skb for this where possible.
59 */
60static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
61{
62 unsigned int len;
63
64 len = crypto_aead_ivsize(aead);
65 if (len) {
66 len += crypto_aead_alignmask(aead) &
67 ~(crypto_tfm_ctx_alignment() - 1);
68 len = ALIGN(len, crypto_tfm_ctx_alignment());
69 }
70
71 len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
72 len = ALIGN(len, __alignof__(struct scatterlist));
73
74 len += sizeof(struct scatterlist) * nfrags;
75
76 return kmalloc(len, GFP_ATOMIC);
77}
78
79static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp)
80{
81 return crypto_aead_ivsize(aead) ?
82 PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp;
83}
84
85static inline struct aead_givcrypt_request *esp_tmp_givreq(
86 struct crypto_aead *aead, u8 *iv)
87{
88 struct aead_givcrypt_request *req;
89
90 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
91 crypto_tfm_ctx_alignment());
92 aead_givcrypt_set_tfm(req, aead);
93 return req;
94}
95
96static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
97{
98 struct aead_request *req;
99
100 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
101 crypto_tfm_ctx_alignment());
102 aead_request_set_tfm(req, aead);
103 return req;
104}
105
106static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
107 struct aead_request *req)
108{
109 return (void *)ALIGN((unsigned long)(req + 1) +
110 crypto_aead_reqsize(aead),
111 __alignof__(struct scatterlist));
112}
113
114static inline struct scatterlist *esp_givreq_sg(
115 struct crypto_aead *aead, struct aead_givcrypt_request *req)
116{
117 return (void *)ALIGN((unsigned long)(req + 1) +
118 crypto_aead_reqsize(aead),
119 __alignof__(struct scatterlist));
120}
121
122static void esp_output_done(struct crypto_async_request *base, int err)
123{
124 struct sk_buff *skb = base->data;
125
126 kfree(ESP_SKB_CB(skb)->tmp);
127 xfrm_output_resume(skb, err);
128}
129
43static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) 130static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
44{ 131{
45 int err; 132 int err;
46 struct ip_esp_hdr *esph; 133 struct ip_esp_hdr *esph;
47 struct crypto_blkcipher *tfm; 134 struct crypto_aead *aead;
48 struct blkcipher_desc desc; 135 struct aead_givcrypt_request *req;
136 struct scatterlist *sg;
137 struct scatterlist *asg;
49 struct sk_buff *trailer; 138 struct sk_buff *trailer;
139 void *tmp;
50 int blksize; 140 int blksize;
51 int clen; 141 int clen;
52 int alen; 142 int alen;
53 int nfrags; 143 int nfrags;
144 u8 *iv;
54 u8 *tail; 145 u8 *tail;
55 struct esp_data *esp = x->data; 146 struct esp_data *esp = x->data;
56 147
@@ -60,18 +151,26 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
60 /* Round to block size */ 151 /* Round to block size */
61 clen = skb->len; 152 clen = skb->len;
62 153
63 alen = esp->auth.icv_trunc_len; 154 aead = esp->aead;
64 tfm = esp->conf.tfm; 155 alen = crypto_aead_authsize(aead);
65 desc.tfm = tfm; 156
66 desc.flags = 0; 157 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
67 blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
68 clen = ALIGN(clen + 2, blksize); 158 clen = ALIGN(clen + 2, blksize);
69 if (esp->conf.padlen) 159 if (esp->padlen)
70 clen = ALIGN(clen, esp->conf.padlen); 160 clen = ALIGN(clen, esp->padlen);
71 161
72 if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) { 162 if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
73 goto error; 163 goto error;
74 } 164 nfrags = err;
165
166 tmp = esp_alloc_tmp(aead, nfrags + 1);
167 if (!tmp)
168 goto error;
169
170 iv = esp_tmp_iv(aead, tmp);
171 req = esp_tmp_givreq(aead, iv);
172 asg = esp_givreq_sg(aead, req);
173 sg = asg + 1;
75 174
76 /* Fill padding... */ 175 /* Fill padding... */
77 tail = skb_tail_pointer(trailer); 176 tail = skb_tail_pointer(trailer);
@@ -81,160 +180,154 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
81 tail[i] = i + 1; 180 tail[i] = i + 1;
82 } while (0); 181 } while (0);
83 tail[clen-skb->len - 2] = (clen - skb->len) - 2; 182 tail[clen-skb->len - 2] = (clen - skb->len) - 2;
84 pskb_put(skb, trailer, clen - skb->len); 183 tail[clen - skb->len - 1] = *skb_mac_header(skb);
184 pskb_put(skb, trailer, clen - skb->len + alen);
85 185
86 skb_push(skb, -skb_network_offset(skb)); 186 skb_push(skb, -skb_network_offset(skb));
87 esph = ip_esp_hdr(skb); 187 esph = ip_esp_hdr(skb);
88 *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb);
89 *skb_mac_header(skb) = IPPROTO_ESP; 188 *skb_mac_header(skb) = IPPROTO_ESP;
90 189
91 esph->spi = x->id.spi; 190 esph->spi = x->id.spi;
92 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); 191 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
93 192
94 spin_lock_bh(&x->lock); 193 sg_init_table(sg, nfrags);
194 skb_to_sgvec(skb, sg,
195 esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
196 clen + alen);
197 sg_init_one(asg, esph, sizeof(*esph));
95 198
96 if (esp->conf.ivlen) { 199 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
97 if (unlikely(!esp->conf.ivinitted)) { 200 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
98 get_random_bytes(esp->conf.ivec, esp->conf.ivlen); 201 aead_givcrypt_set_assoc(req, asg, sizeof(*esph));
99 esp->conf.ivinitted = 1; 202 aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq);
100 }
101 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
102 }
103 203
104 do { 204 ESP_SKB_CB(skb)->tmp = tmp;
105 struct scatterlist *sg = &esp->sgbuf[0]; 205 err = crypto_aead_givencrypt(req);
206 if (err == -EINPROGRESS)
207 goto error;
106 208
107 if (unlikely(nfrags > ESP_NUM_FAST_SG)) { 209 if (err == -EBUSY)
108 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); 210 err = NET_XMIT_DROP;
109 if (!sg) 211
110 goto unlock; 212 kfree(tmp);
111 } 213
112 sg_init_table(sg, nfrags); 214error:
113 sg_mark_end(sg, skb_to_sgvec(skb, sg, esph->enc_data + 215 return err;
114 esp->conf.ivlen - 216}
115 skb->data, clen)); 217
116 err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); 218static int esp_input_done2(struct sk_buff *skb, int err)
117 if (unlikely(sg != &esp->sgbuf[0])) 219{
118 kfree(sg); 220 struct xfrm_state *x = xfrm_input_state(skb);
119 } while (0); 221 struct esp_data *esp = x->data;
222 struct crypto_aead *aead = esp->aead;
223 int alen = crypto_aead_authsize(aead);
224 int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
225 int elen = skb->len - hlen;
226 int hdr_len = skb_network_header_len(skb);
227 int padlen;
228 u8 nexthdr[2];
229
230 kfree(ESP_SKB_CB(skb)->tmp);
120 231
121 if (unlikely(err)) 232 if (unlikely(err))
122 goto unlock; 233 goto out;
123 234
124 if (esp->conf.ivlen) { 235 if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
125 memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); 236 BUG();
126 crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
127 }
128 237
129 if (esp->auth.icv_full_len) { 238 err = -EINVAL;
130 err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, 239 padlen = nexthdr[0];
131 sizeof(*esph) + esp->conf.ivlen + clen); 240 if (padlen + 2 + alen >= elen) {
132 memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); 241 LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage "
242 "padlen=%d, elen=%d\n", padlen + 2, elen - alen);
243 goto out;
133 } 244 }
134 245
135unlock: 246 /* ... check padding bits here. Silly. :-) */
136 spin_unlock_bh(&x->lock);
137 247
138error: 248 pskb_trim(skb, skb->len - alen - padlen - 2);
249 __skb_pull(skb, hlen);
250 skb_set_transport_header(skb, -hdr_len);
251
252 err = nexthdr[1];
253
254 /* RFC4303: Drop dummy packets without any error */
255 if (err == IPPROTO_NONE)
256 err = -EINVAL;
257
258out:
139 return err; 259 return err;
140} 260}
141 261
262static void esp_input_done(struct crypto_async_request *base, int err)
263{
264 struct sk_buff *skb = base->data;
265
266 xfrm_input_resume(skb, esp_input_done2(skb, err));
267}
268
142static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) 269static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
143{ 270{
144 struct ipv6hdr *iph;
145 struct ip_esp_hdr *esph; 271 struct ip_esp_hdr *esph;
146 struct esp_data *esp = x->data; 272 struct esp_data *esp = x->data;
147 struct crypto_blkcipher *tfm = esp->conf.tfm; 273 struct crypto_aead *aead = esp->aead;
148 struct blkcipher_desc desc = { .tfm = tfm }; 274 struct aead_request *req;
149 struct sk_buff *trailer; 275 struct sk_buff *trailer;
150 int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); 276 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
151 int alen = esp->auth.icv_trunc_len;
152 int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen;
153 int hdr_len = skb_network_header_len(skb);
154 int nfrags; 277 int nfrags;
155 int ret = 0; 278 int ret = 0;
279 void *tmp;
280 u8 *iv;
281 struct scatterlist *sg;
282 struct scatterlist *asg;
156 283
157 if (!pskb_may_pull(skb, sizeof(*esph))) { 284 if (!pskb_may_pull(skb, sizeof(*esph))) {
158 ret = -EINVAL; 285 ret = -EINVAL;
159 goto out; 286 goto out;
160 } 287 }
161 288
162 if (elen <= 0 || (elen & (blksize-1))) { 289 if (elen <= 0) {
163 ret = -EINVAL; 290 ret = -EINVAL;
164 goto out; 291 goto out;
165 } 292 }
166 293
167 /* If integrity check is required, do this. */
168 if (esp->auth.icv_full_len) {
169 u8 sum[alen];
170
171 ret = esp_mac_digest(esp, skb, 0, skb->len - alen);
172 if (ret)
173 goto out;
174
175 if (skb_copy_bits(skb, skb->len - alen, sum, alen))
176 BUG();
177
178 if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
179 x->stats.integrity_failed++;
180 ret = -EINVAL;
181 goto out;
182 }
183 }
184
185 if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { 294 if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
186 ret = -EINVAL; 295 ret = -EINVAL;
187 goto out; 296 goto out;
188 } 297 }
189 298
299 ret = -ENOMEM;
300 tmp = esp_alloc_tmp(aead, nfrags + 1);
301 if (!tmp)
302 goto out;
303
304 ESP_SKB_CB(skb)->tmp = tmp;
305 iv = esp_tmp_iv(aead, tmp);
306 req = esp_tmp_req(aead, iv);
307 asg = esp_req_sg(aead, req);
308 sg = asg + 1;
309
190 skb->ip_summed = CHECKSUM_NONE; 310 skb->ip_summed = CHECKSUM_NONE;
191 311
192 esph = (struct ip_esp_hdr *)skb->data; 312 esph = (struct ip_esp_hdr *)skb->data;
193 iph = ipv6_hdr(skb);
194 313
195 /* Get ivec. This can be wrong, check against another impls. */ 314 /* Get ivec. This can be wrong, check against another impls. */
196 if (esp->conf.ivlen) 315 iv = esph->enc_data;
197 crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
198
199 {
200 u8 nexthdr[2];
201 struct scatterlist *sg = &esp->sgbuf[0];
202 u8 padlen;
203
204 if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
205 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
206 if (!sg) {
207 ret = -ENOMEM;
208 goto out;
209 }
210 }
211 sg_init_table(sg, nfrags);
212 sg_mark_end(sg, skb_to_sgvec(skb, sg,
213 sizeof(*esph) + esp->conf.ivlen,
214 elen));
215 ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
216 if (unlikely(sg != &esp->sgbuf[0]))
217 kfree(sg);
218 if (unlikely(ret))
219 goto out;
220
221 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
222 BUG();
223
224 padlen = nexthdr[0];
225 if (padlen+2 >= elen) {
226 LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen);
227 ret = -EINVAL;
228 goto out;
229 }
230 /* ... check padding bits here. Silly. :-) */
231 316
232 pskb_trim(skb, skb->len - alen - padlen - 2); 317 sg_init_table(sg, nfrags);
233 ret = nexthdr[1]; 318 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
234 } 319 sg_init_one(asg, esph, sizeof(*esph));
320
321 aead_request_set_callback(req, 0, esp_input_done, skb);
322 aead_request_set_crypt(req, sg, sg, elen, iv);
323 aead_request_set_assoc(req, asg, sizeof(*esph));
324
325 ret = crypto_aead_decrypt(req);
326 if (ret == -EINPROGRESS)
327 goto out;
328
329 ret = esp_input_done2(skb, ret);
235 330
236 __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
237 skb_set_transport_header(skb, -hdr_len);
238out: 331out:
239 return ret; 332 return ret;
240} 333}
@@ -242,11 +335,11 @@ out:
242static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) 335static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
243{ 336{
244 struct esp_data *esp = x->data; 337 struct esp_data *esp = x->data;
245 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 338 u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
246 u32 align = max_t(u32, blksize, esp->conf.padlen); 339 u32 align = max_t(u32, blksize, esp->padlen);
247 u32 rem; 340 u32 rem;
248 341
249 mtu -= x->props.header_len + esp->auth.icv_trunc_len; 342 mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
250 rem = mtu & (align - 1); 343 rem = mtu & (align - 1);
251 mtu &= ~(align - 1); 344 mtu &= ~(align - 1);
252 345
@@ -285,81 +378,146 @@ static void esp6_destroy(struct xfrm_state *x)
285 if (!esp) 378 if (!esp)
286 return; 379 return;
287 380
288 crypto_free_blkcipher(esp->conf.tfm); 381 crypto_free_aead(esp->aead);
289 esp->conf.tfm = NULL;
290 kfree(esp->conf.ivec);
291 esp->conf.ivec = NULL;
292 crypto_free_hash(esp->auth.tfm);
293 esp->auth.tfm = NULL;
294 kfree(esp->auth.work_icv);
295 esp->auth.work_icv = NULL;
296 kfree(esp); 382 kfree(esp);
297} 383}
298 384
299static int esp6_init_state(struct xfrm_state *x) 385static int esp_init_aead(struct xfrm_state *x)
300{ 386{
301 struct esp_data *esp = NULL; 387 struct esp_data *esp = x->data;
302 struct crypto_blkcipher *tfm; 388 struct crypto_aead *aead;
389 int err;
390
391 aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
392 err = PTR_ERR(aead);
393 if (IS_ERR(aead))
394 goto error;
395
396 esp->aead = aead;
303 397
398 err = crypto_aead_setkey(aead, x->aead->alg_key,
399 (x->aead->alg_key_len + 7) / 8);
400 if (err)
401 goto error;
402
403 err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8);
404 if (err)
405 goto error;
406
407error:
408 return err;
409}
410
411static int esp_init_authenc(struct xfrm_state *x)
412{
413 struct esp_data *esp = x->data;
414 struct crypto_aead *aead;
415 struct crypto_authenc_key_param *param;
416 struct rtattr *rta;
417 char *key;
418 char *p;
419 char authenc_name[CRYPTO_MAX_ALG_NAME];
420 unsigned int keylen;
421 int err;
422
423 err = -EINVAL;
304 if (x->ealg == NULL) 424 if (x->ealg == NULL)
305 goto error; 425 goto error;
306 426
307 if (x->encap) 427 err = -ENAMETOOLONG;
428 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)",
429 x->aalg ? x->aalg->alg_name : "digest_null",
430 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
308 goto error; 431 goto error;
309 432
310 esp = kzalloc(sizeof(*esp), GFP_KERNEL); 433 aead = crypto_alloc_aead(authenc_name, 0, 0);
311 if (esp == NULL) 434 err = PTR_ERR(aead);
312 return -ENOMEM; 435 if (IS_ERR(aead))
436 goto error;
437
438 esp->aead = aead;
439
440 keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
441 (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
442 err = -ENOMEM;
443 key = kmalloc(keylen, GFP_KERNEL);
444 if (!key)
445 goto error;
446
447 p = key;
448 rta = (void *)p;
449 rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM;
450 rta->rta_len = RTA_LENGTH(sizeof(*param));
451 param = RTA_DATA(rta);
452 p += RTA_SPACE(sizeof(*param));
313 453
314 if (x->aalg) { 454 if (x->aalg) {
315 struct xfrm_algo_desc *aalg_desc; 455 struct xfrm_algo_desc *aalg_desc;
316 struct crypto_hash *hash;
317
318 hash = crypto_alloc_hash(x->aalg->alg_name, 0,
319 CRYPTO_ALG_ASYNC);
320 if (IS_ERR(hash))
321 goto error;
322 456
323 esp->auth.tfm = hash; 457 memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8);
324 if (crypto_hash_setkey(hash, x->aalg->alg_key, 458 p += (x->aalg->alg_key_len + 7) / 8;
325 (x->aalg->alg_key_len + 7) / 8))
326 goto error;
327 459
328 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 460 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
329 BUG_ON(!aalg_desc); 461 BUG_ON(!aalg_desc);
330 462
463 err = -EINVAL;
331 if (aalg_desc->uinfo.auth.icv_fullbits/8 != 464 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
332 crypto_hash_digestsize(hash)) { 465 crypto_aead_authsize(aead)) {
333 NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", 466 NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
334 x->aalg->alg_name, 467 x->aalg->alg_name,
335 crypto_hash_digestsize(hash), 468 crypto_aead_authsize(aead),
336 aalg_desc->uinfo.auth.icv_fullbits/8); 469 aalg_desc->uinfo.auth.icv_fullbits/8);
337 goto error; 470 goto free_key;
338 } 471 }
339 472
340 esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; 473 err = crypto_aead_setauthsize(
341 esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; 474 aead, aalg_desc->uinfo.auth.icv_truncbits / 8);
342 475 if (err)
343 esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL); 476 goto free_key;
344 if (!esp->auth.work_icv)
345 goto error;
346 }
347 tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
348 if (IS_ERR(tfm))
349 goto error;
350 esp->conf.tfm = tfm;
351 esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
352 esp->conf.padlen = 0;
353 if (esp->conf.ivlen) {
354 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
355 if (unlikely(esp->conf.ivec == NULL))
356 goto error;
357 esp->conf.ivinitted = 0;
358 } 477 }
359 if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, 478
360 (x->ealg->alg_key_len + 7) / 8)) 479 param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
480 memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8);
481
482 err = crypto_aead_setkey(aead, key, keylen);
483
484free_key:
485 kfree(key);
486
487error:
488 return err;
489}
490
491static int esp6_init_state(struct xfrm_state *x)
492{
493 struct esp_data *esp;
494 struct crypto_aead *aead;
495 u32 align;
496 int err;
497
498 if (x->encap)
499 return -EINVAL;
500
501 esp = kzalloc(sizeof(*esp), GFP_KERNEL);
502 if (esp == NULL)
503 return -ENOMEM;
504
505 x->data = esp;
506
507 if (x->aead)
508 err = esp_init_aead(x);
509 else
510 err = esp_init_authenc(x);
511
512 if (err)
361 goto error; 513 goto error;
362 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; 514
515 aead = esp->aead;
516
517 esp->padlen = 0;
518
519 x->props.header_len = sizeof(struct ip_esp_hdr) +
520 crypto_aead_ivsize(aead);
363 switch (x->props.mode) { 521 switch (x->props.mode) {
364 case XFRM_MODE_BEET: 522 case XFRM_MODE_BEET:
365 case XFRM_MODE_TRANSPORT: 523 case XFRM_MODE_TRANSPORT:
@@ -370,17 +528,17 @@ static int esp6_init_state(struct xfrm_state *x)
370 default: 528 default:
371 goto error; 529 goto error;
372 } 530 }
373 x->data = esp; 531
374 return 0; 532 align = ALIGN(crypto_aead_blocksize(aead), 4);
533 if (esp->padlen)
534 align = max_t(u32, align, esp->padlen);
535 x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
375 536
376error: 537error:
377 x->data = esp; 538 return err;
378 esp6_destroy(x);
379 x->data = NULL;
380 return -EINVAL;
381} 539}
382 540
383static struct xfrm_type esp6_type = 541static const struct xfrm_type esp6_type =
384{ 542{
385 .description = "ESP6", 543 .description = "ESP6",
386 .owner = THIS_MODULE, 544 .owner = THIS_MODULE,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 1e89efd38a0c..3cd1c993d52b 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -32,6 +32,7 @@
32#include <linux/in6.h> 32#include <linux/in6.h>
33#include <linux/icmpv6.h> 33#include <linux/icmpv6.h>
34 34
35#include <net/dst.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/snmp.h> 37#include <net/snmp.h>
37 38
@@ -307,38 +308,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
307 return -1; 308 return -1;
308} 309}
309 310
310static struct inet6_protocol destopt_protocol = {
311 .handler = ipv6_destopt_rcv,
312 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
313};
314
315void __init ipv6_destopt_init(void)
316{
317 if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
318 printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
319}
320
321/********************************
322 NONE header. No data in packet.
323 ********************************/
324
325static int ipv6_nodata_rcv(struct sk_buff *skb)
326{
327 kfree_skb(skb);
328 return 0;
329}
330
331static struct inet6_protocol nodata_protocol = {
332 .handler = ipv6_nodata_rcv,
333 .flags = INET6_PROTO_NOPOLICY,
334};
335
336void __init ipv6_nodata_init(void)
337{
338 if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
339 printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
340}
341
342/******************************** 311/********************************
343 Routing header. 312 Routing header.
344 ********************************/ 313 ********************************/
@@ -476,7 +445,7 @@ looped_back:
476 kfree_skb(skb); 445 kfree_skb(skb);
477 return -1; 446 return -1;
478 } 447 }
479 if (!ipv6_chk_home_addr(addr)) { 448 if (!ipv6_chk_home_addr(&init_net, addr)) {
480 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 449 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
481 IPSTATS_MIB_INADDRERRORS); 450 IPSTATS_MIB_INADDRERRORS);
482 kfree_skb(skb); 451 kfree_skb(skb);
@@ -536,12 +505,48 @@ static struct inet6_protocol rthdr_protocol = {
536 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, 505 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
537}; 506};
538 507
539void __init ipv6_rthdr_init(void) 508static struct inet6_protocol destopt_protocol = {
509 .handler = ipv6_destopt_rcv,
510 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
511};
512
513static struct inet6_protocol nodata_protocol = {
514 .handler = dst_discard,
515 .flags = INET6_PROTO_NOPOLICY,
516};
517
518int __init ipv6_exthdrs_init(void)
540{ 519{
541 if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0) 520 int ret;
542 printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n"); 521
522 ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING);
523 if (ret)
524 goto out;
525
526 ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
527 if (ret)
528 goto out_rthdr;
529
530 ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE);
531 if (ret)
532 goto out_destopt;
533
534out:
535 return ret;
536out_rthdr:
537 inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
538out_destopt:
539 inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
540 goto out;
543}; 541};
544 542
543void ipv6_exthdrs_exit(void)
544{
545 inet6_del_protocol(&nodata_protocol, IPPROTO_NONE);
546 inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
547 inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
548}
549
545/********************************** 550/**********************************
546 Hop-by-hop options. 551 Hop-by-hop options.
547 **********************************/ 552 **********************************/
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 706622af206f..695c0ca8a417 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -31,25 +31,6 @@ struct fib6_rule
31 31
32static struct fib_rules_ops fib6_rules_ops; 32static struct fib_rules_ops fib6_rules_ops;
33 33
34static struct fib6_rule main_rule = {
35 .common = {
36 .refcnt = ATOMIC_INIT(2),
37 .pref = 0x7FFE,
38 .action = FR_ACT_TO_TBL,
39 .table = RT6_TABLE_MAIN,
40 },
41};
42
43static struct fib6_rule local_rule = {
44 .common = {
45 .refcnt = ATOMIC_INIT(2),
46 .pref = 0,
47 .action = FR_ACT_TO_TBL,
48 .table = RT6_TABLE_LOCAL,
49 .flags = FIB_RULE_PERMANENT,
50 },
51};
52
53struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, 34struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
54 pol_lookup_t lookup) 35 pol_lookup_t lookup)
55{ 36{
@@ -242,7 +223,7 @@ nla_put_failure:
242 return -ENOBUFS; 223 return -ENOBUFS;
243} 224}
244 225
245static u32 fib6_rule_default_pref(void) 226static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
246{ 227{
247 return 0x3FFF; 228 return 0x3FFF;
248} 229}
@@ -268,14 +249,40 @@ static struct fib_rules_ops fib6_rules_ops = {
268 .policy = fib6_rule_policy, 249 .policy = fib6_rule_policy,
269 .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), 250 .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list),
270 .owner = THIS_MODULE, 251 .owner = THIS_MODULE,
252 .fro_net = &init_net,
271}; 253};
272 254
273void __init fib6_rules_init(void) 255static int __init fib6_default_rules_init(void)
274{ 256{
275 list_add_tail(&local_rule.common.list, &fib6_rules_ops.rules_list); 257 int err;
276 list_add_tail(&main_rule.common.list, &fib6_rules_ops.rules_list); 258
259 err = fib_default_rule_add(&fib6_rules_ops, 0,
260 RT6_TABLE_LOCAL, FIB_RULE_PERMANENT);
261 if (err < 0)
262 return err;
263 err = fib_default_rule_add(&fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0);
264 if (err < 0)
265 return err;
266 return 0;
267}
277 268
278 fib_rules_register(&fib6_rules_ops); 269int __init fib6_rules_init(void)
270{
271 int ret;
272
273 ret = fib6_default_rules_init();
274 if (ret)
275 goto out;
276
277 ret = fib_rules_register(&fib6_rules_ops);
278 if (ret)
279 goto out_default_rules_init;
280out:
281 return ret;
282
283out_default_rules_init:
284 fib_rules_cleanup_ops(&fib6_rules_ops);
285 goto out;
279} 286}
280 287
281void fib6_rules_cleanup(void) 288void fib6_rules_cleanup(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9bb031fa1c2f..cbb5b9cf84ad 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -63,6 +63,7 @@
63#include <net/ip6_route.h> 63#include <net/ip6_route.h>
64#include <net/addrconf.h> 64#include <net/addrconf.h>
65#include <net/icmp.h> 65#include <net/icmp.h>
66#include <net/xfrm.h>
66 67
67#include <asm/uaccess.h> 68#include <asm/uaccess.h>
68#include <asm/system.h> 69#include <asm/system.h>
@@ -86,7 +87,7 @@ static int icmpv6_rcv(struct sk_buff *skb);
86 87
87static struct inet6_protocol icmpv6_protocol = { 88static struct inet6_protocol icmpv6_protocol = {
88 .handler = icmpv6_rcv, 89 .handler = icmpv6_rcv,
89 .flags = INET6_PROTO_FINAL, 90 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
90}; 91};
91 92
92static __inline__ int icmpv6_xmit_lock(void) 93static __inline__ int icmpv6_xmit_lock(void)
@@ -153,8 +154,6 @@ static int is_ineligible(struct sk_buff *skb)
153 return 0; 154 return 0;
154} 155}
155 156
156static int sysctl_icmpv6_time __read_mostly = 1*HZ;
157
158/* 157/*
159 * Check the ICMP output rate limit 158 * Check the ICMP output rate limit
160 */ 159 */
@@ -185,7 +184,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
185 res = 1; 184 res = 1;
186 } else { 185 } else {
187 struct rt6_info *rt = (struct rt6_info *)dst; 186 struct rt6_info *rt = (struct rt6_info *)dst;
188 int tmo = sysctl_icmpv6_time; 187 int tmo = init_net.ipv6.sysctl.icmpv6_time;
189 188
190 /* Give more bandwidth to wider prefixes. */ 189 /* Give more bandwidth to wider prefixes. */
191 if (rt->rt6i_dst.plen < 128) 190 if (rt->rt6i_dst.plen < 128)
@@ -310,8 +309,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
310 struct ipv6_pinfo *np; 309 struct ipv6_pinfo *np;
311 struct in6_addr *saddr = NULL; 310 struct in6_addr *saddr = NULL;
312 struct dst_entry *dst; 311 struct dst_entry *dst;
312 struct dst_entry *dst2;
313 struct icmp6hdr tmp_hdr; 313 struct icmp6hdr tmp_hdr;
314 struct flowi fl; 314 struct flowi fl;
315 struct flowi fl2;
315 struct icmpv6_msg msg; 316 struct icmpv6_msg msg;
316 int iif = 0; 317 int iif = 0;
317 int addr_type = 0; 318 int addr_type = 0;
@@ -331,7 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
331 */ 332 */
332 addr_type = ipv6_addr_type(&hdr->daddr); 333 addr_type = ipv6_addr_type(&hdr->daddr);
333 334
334 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) 335 if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0))
335 saddr = &hdr->daddr; 336 saddr = &hdr->daddr;
336 337
337 /* 338 /*
@@ -418,9 +419,42 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
418 goto out_dst_release; 419 goto out_dst_release;
419 } 420 }
420 421
421 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) 422 /* No need to clone since we're just using its address. */
423 dst2 = dst;
424
425 err = xfrm_lookup(&dst, &fl, sk, 0);
426 switch (err) {
427 case 0:
428 if (dst != dst2)
429 goto route_done;
430 break;
431 case -EPERM:
432 dst = NULL;
433 break;
434 default:
422 goto out; 435 goto out;
436 }
423 437
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
439 goto out;
440
441 if (ip6_dst_lookup(sk, &dst2, &fl))
442 goto out;
443
444 err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
445 if (err == -ENOENT) {
446 if (!dst)
447 goto out;
448 goto route_done;
449 }
450
451 dst_release(dst);
452 dst = dst2;
453
454 if (err)
455 goto out;
456
457route_done:
424 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 458 if (ipv6_addr_is_multicast(&fl.fl6_dst))
425 hlimit = np->mcast_hops; 459 hlimit = np->mcast_hops;
426 else 460 else
@@ -458,8 +492,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
458 } 492 }
459 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); 493 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
460 494
461 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
462
463out_put: 495out_put:
464 if (likely(idev != NULL)) 496 if (likely(idev != NULL))
465 in6_dev_put(idev); 497 in6_dev_put(idev);
@@ -557,9 +589,7 @@ out:
557 589
558static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) 590static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
559{ 591{
560 struct in6_addr *saddr, *daddr;
561 struct inet6_protocol *ipprot; 592 struct inet6_protocol *ipprot;
562 struct sock *sk;
563 int inner_offset; 593 int inner_offset;
564 int hash; 594 int hash;
565 u8 nexthdr; 595 u8 nexthdr;
@@ -581,9 +611,6 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
581 if (!pskb_may_pull(skb, inner_offset+8)) 611 if (!pskb_may_pull(skb, inner_offset+8))
582 return; 612 return;
583 613
584 saddr = &ipv6_hdr(skb)->saddr;
585 daddr = &ipv6_hdr(skb)->daddr;
586
587 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. 614 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
588 Without this we will not able f.e. to make source routed 615 Without this we will not able f.e. to make source routed
589 pmtu discovery. 616 pmtu discovery.
@@ -599,15 +626,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
599 ipprot->err_handler(skb, NULL, type, code, inner_offset, info); 626 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
600 rcu_read_unlock(); 627 rcu_read_unlock();
601 628
602 read_lock(&raw_v6_lock); 629 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
603 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
604 while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr,
605 IP6CB(skb)->iif))) {
606 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
607 sk = sk_next(sk);
608 }
609 }
610 read_unlock(&raw_v6_lock);
611} 630}
612 631
613/* 632/*
@@ -623,6 +642,25 @@ static int icmpv6_rcv(struct sk_buff *skb)
623 struct icmp6hdr *hdr; 642 struct icmp6hdr *hdr;
624 int type; 643 int type;
625 644
645 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
646 int nh;
647
648 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
649 XFRM_STATE_ICMP))
650 goto drop_no_count;
651
652 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
653 goto drop_no_count;
654
655 nh = skb_network_offset(skb);
656 skb_set_network_header(skb, sizeof(*hdr));
657
658 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
659 goto drop_no_count;
660
661 skb_set_network_header(skb, nh);
662 }
663
626 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); 664 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
627 665
628 saddr = &ipv6_hdr(skb)->saddr; 666 saddr = &ipv6_hdr(skb)->saddr;
@@ -645,8 +683,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
645 } 683 }
646 } 684 }
647 685
648 if (!pskb_pull(skb, sizeof(struct icmp6hdr))) 686 __skb_pull(skb, sizeof(*hdr));
649 goto discard_it;
650 687
651 hdr = icmp6_hdr(skb); 688 hdr = icmp6_hdr(skb);
652 689
@@ -732,6 +769,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
732 769
733discard_it: 770discard_it:
734 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); 771 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
772drop_no_count:
735 kfree_skb(skb); 773 kfree_skb(skb);
736 return 0; 774 return 0;
737} 775}
@@ -867,16 +905,26 @@ int icmpv6_err_convert(int type, int code, int *err)
867EXPORT_SYMBOL(icmpv6_err_convert); 905EXPORT_SYMBOL(icmpv6_err_convert);
868 906
869#ifdef CONFIG_SYSCTL 907#ifdef CONFIG_SYSCTL
870ctl_table ipv6_icmp_table[] = { 908ctl_table ipv6_icmp_table_template[] = {
871 { 909 {
872 .ctl_name = NET_IPV6_ICMP_RATELIMIT, 910 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
873 .procname = "ratelimit", 911 .procname = "ratelimit",
874 .data = &sysctl_icmpv6_time, 912 .data = &init_net.ipv6.sysctl.icmpv6_time,
875 .maxlen = sizeof(int), 913 .maxlen = sizeof(int),
876 .mode = 0644, 914 .mode = 0644,
877 .proc_handler = &proc_dointvec 915 .proc_handler = &proc_dointvec
878 }, 916 },
879 { .ctl_name = 0 }, 917 { .ctl_name = 0 },
880}; 918};
919
920struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
921{
922 struct ctl_table *table;
923
924 table = kmemdup(ipv6_icmp_table_template,
925 sizeof(ipv6_icmp_table_template),
926 GFP_KERNEL);
927 return table;
928}
881#endif 929#endif
882 930
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d6f1026f1943..d325a9958909 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -37,14 +37,13 @@ void __inet6_hash(struct inet_hashinfo *hashinfo,
37 } else { 37 } else {
38 unsigned int hash; 38 unsigned int hash;
39 sk->sk_hash = hash = inet6_sk_ehashfn(sk); 39 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
40 hash &= (hashinfo->ehash_size - 1); 40 list = &inet_ehash_bucket(hashinfo, hash)->chain;
41 list = &hashinfo->ehash[hash].chain; 41 lock = inet_ehash_lockp(hashinfo, hash);
42 lock = &hashinfo->ehash[hash].lock;
43 write_lock(lock); 42 write_lock(lock);
44 } 43 }
45 44
46 __sk_add_node(sk, list); 45 __sk_add_node(sk, list);
47 sock_prot_inc_use(sk->sk_prot); 46 sock_prot_inuse_add(sk->sk_prot, 1);
48 write_unlock(lock); 47 write_unlock(lock);
49} 48}
50EXPORT_SYMBOL(__inet6_hash); 49EXPORT_SYMBOL(__inet6_hash);
@@ -55,7 +54,8 @@ EXPORT_SYMBOL(__inet6_hash);
55 * 54 *
56 * The sockhash lock must be held as a reader here. 55 * The sockhash lock must be held as a reader here.
57 */ 56 */
58struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, 57struct sock *__inet6_lookup_established(struct net *net,
58 struct inet_hashinfo *hashinfo,
59 const struct in6_addr *saddr, 59 const struct in6_addr *saddr,
60 const __be16 sport, 60 const __be16 sport,
61 const struct in6_addr *daddr, 61 const struct in6_addr *daddr,
@@ -70,41 +70,33 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
70 */ 70 */
71 unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); 71 unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
72 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); 72 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
73 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
73 74
74 prefetch(head->chain.first); 75 prefetch(head->chain.first);
75 read_lock(&head->lock); 76 read_lock(lock);
76 sk_for_each(sk, node, &head->chain) { 77 sk_for_each(sk, node, &head->chain) {
77 /* For IPV6 do the cheaper port and family tests first. */ 78 /* For IPV6 do the cheaper port and family tests first. */
78 if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) 79 if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif))
79 goto hit; /* You sunk my battleship! */ 80 goto hit; /* You sunk my battleship! */
80 } 81 }
81 /* Must check for a TIME_WAIT'er before going to listener hash. */ 82 /* Must check for a TIME_WAIT'er before going to listener hash. */
82 sk_for_each(sk, node, &head->twchain) { 83 sk_for_each(sk, node, &head->twchain) {
83 const struct inet_timewait_sock *tw = inet_twsk(sk); 84 if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif))
84 85 goto hit;
85 if(*((__portpair *)&(tw->tw_dport)) == ports &&
86 sk->sk_family == PF_INET6) {
87 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
88
89 if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
90 ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
91 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
92 goto hit;
93 }
94 } 86 }
95 read_unlock(&head->lock); 87 read_unlock(lock);
96 return NULL; 88 return NULL;
97 89
98hit: 90hit:
99 sock_hold(sk); 91 sock_hold(sk);
100 read_unlock(&head->lock); 92 read_unlock(lock);
101 return sk; 93 return sk;
102} 94}
103EXPORT_SYMBOL(__inet6_lookup_established); 95EXPORT_SYMBOL(__inet6_lookup_established);
104 96
105struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, 97struct sock *inet6_lookup_listener(struct net *net,
106 const struct in6_addr *daddr, 98 struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
107 const unsigned short hnum, const int dif) 99 const unsigned short hnum, const int dif)
108{ 100{
109 struct sock *sk; 101 struct sock *sk;
110 const struct hlist_node *node; 102 const struct hlist_node *node;
@@ -113,7 +105,8 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
113 105
114 read_lock(&hashinfo->lhash_lock); 106 read_lock(&hashinfo->lhash_lock);
115 sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { 107 sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
116 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { 108 if (sk->sk_net == net && inet_sk(sk)->num == hnum &&
109 sk->sk_family == PF_INET6) {
117 const struct ipv6_pinfo *np = inet6_sk(sk); 110 const struct ipv6_pinfo *np = inet6_sk(sk);
118 111
119 score = 1; 112 score = 1;
@@ -145,7 +138,7 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
145 138
146EXPORT_SYMBOL_GPL(inet6_lookup_listener); 139EXPORT_SYMBOL_GPL(inet6_lookup_listener);
147 140
148struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, 141struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
149 const struct in6_addr *saddr, const __be16 sport, 142 const struct in6_addr *saddr, const __be16 sport,
150 const struct in6_addr *daddr, const __be16 dport, 143 const struct in6_addr *daddr, const __be16 dport,
151 const int dif) 144 const int dif)
@@ -153,7 +146,7 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
153 struct sock *sk; 146 struct sock *sk;
154 147
155 local_bh_disable(); 148 local_bh_disable();
156 sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); 149 sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
157 local_bh_enable(); 150 local_bh_enable();
158 151
159 return sk; 152 return sk;
@@ -175,24 +168,20 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
175 const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, 168 const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
176 inet->dport); 169 inet->dport);
177 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 170 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
171 rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
178 struct sock *sk2; 172 struct sock *sk2;
179 const struct hlist_node *node; 173 const struct hlist_node *node;
180 struct inet_timewait_sock *tw; 174 struct inet_timewait_sock *tw;
175 struct net *net = sk->sk_net;
181 176
182 prefetch(head->chain.first); 177 prefetch(head->chain.first);
183 write_lock(&head->lock); 178 write_lock(lock);
184 179
185 /* Check TIME-WAIT sockets first. */ 180 /* Check TIME-WAIT sockets first. */
186 sk_for_each(sk2, node, &head->twchain) { 181 sk_for_each(sk2, node, &head->twchain) {
187 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
188
189 tw = inet_twsk(sk2); 182 tw = inet_twsk(sk2);
190 183
191 if(*((__portpair *)&(tw->tw_dport)) == ports && 184 if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
192 sk2->sk_family == PF_INET6 &&
193 ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
194 ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
195 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
196 if (twsk_unique(sk, sk2, twp)) 185 if (twsk_unique(sk, sk2, twp))
197 goto unique; 186 goto unique;
198 else 187 else
@@ -203,7 +192,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
203 192
204 /* And established part... */ 193 /* And established part... */
205 sk_for_each(sk2, node, &head->chain) { 194 sk_for_each(sk2, node, &head->chain) {
206 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) 195 if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
207 goto not_unique; 196 goto not_unique;
208 } 197 }
209 198
@@ -215,8 +204,8 @@ unique:
215 BUG_TRAP(sk_unhashed(sk)); 204 BUG_TRAP(sk_unhashed(sk));
216 __sk_add_node(sk, &head->chain); 205 __sk_add_node(sk, &head->chain);
217 sk->sk_hash = hash; 206 sk->sk_hash = hash;
218 sock_prot_inc_use(sk->sk_prot); 207 sock_prot_inuse_add(sk->sk_prot, 1);
219 write_unlock(&head->lock); 208 write_unlock(lock);
220 209
221 if (twp != NULL) { 210 if (twp != NULL) {
222 *twp = tw; 211 *twp = tw;
@@ -231,7 +220,7 @@ unique:
231 return 0; 220 return 0;
232 221
233not_unique: 222not_unique:
234 write_unlock(&head->lock); 223 write_unlock(lock);
235 return -EADDRNOTAVAIL; 224 return -EADDRNOTAVAIL;
236} 225}
237 226
@@ -247,97 +236,8 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk)
247int inet6_hash_connect(struct inet_timewait_death_row *death_row, 236int inet6_hash_connect(struct inet_timewait_death_row *death_row,
248 struct sock *sk) 237 struct sock *sk)
249{ 238{
250 struct inet_hashinfo *hinfo = death_row->hashinfo; 239 return __inet_hash_connect(death_row, sk,
251 const unsigned short snum = inet_sk(sk)->num; 240 __inet6_check_established, __inet6_hash);
252 struct inet_bind_hashbucket *head;
253 struct inet_bind_bucket *tb;
254 int ret;
255
256 if (snum == 0) {
257 int i, port, low, high, remaining;
258 static u32 hint;
259 const u32 offset = hint + inet6_sk_port_offset(sk);
260 struct hlist_node *node;
261 struct inet_timewait_sock *tw = NULL;
262
263 inet_get_local_port_range(&low, &high);
264 remaining = (high - low) + 1;
265
266 local_bh_disable();
267 for (i = 1; i <= remaining; i++) {
268 port = low + (i + offset) % remaining;
269 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
270 spin_lock(&head->lock);
271
272 /* Does not bother with rcv_saddr checks,
273 * because the established check is already
274 * unique enough.
275 */
276 inet_bind_bucket_for_each(tb, node, &head->chain) {
277 if (tb->port == port) {
278 BUG_TRAP(!hlist_empty(&tb->owners));
279 if (tb->fastreuse >= 0)
280 goto next_port;
281 if (!__inet6_check_established(death_row,
282 sk, port,
283 &tw))
284 goto ok;
285 goto next_port;
286 }
287 }
288
289 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
290 head, port);
291 if (!tb) {
292 spin_unlock(&head->lock);
293 break;
294 }
295 tb->fastreuse = -1;
296 goto ok;
297
298 next_port:
299 spin_unlock(&head->lock);
300 }
301 local_bh_enable();
302
303 return -EADDRNOTAVAIL;
304
305ok:
306 hint += i;
307
308 /* Head lock still held and bh's disabled */
309 inet_bind_hash(sk, tb, port);
310 if (sk_unhashed(sk)) {
311 inet_sk(sk)->sport = htons(port);
312 __inet6_hash(hinfo, sk);
313 }
314 spin_unlock(&head->lock);
315
316 if (tw) {
317 inet_twsk_deschedule(tw, death_row);
318 inet_twsk_put(tw);
319 }
320
321 ret = 0;
322 goto out;
323 }
324
325 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
326 tb = inet_csk(sk)->icsk_bind_hash;
327 spin_lock_bh(&head->lock);
328
329 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
330 __inet6_hash(hinfo, sk);
331 spin_unlock_bh(&head->lock);
332 return 0;
333 } else {
334 spin_unlock(&head->lock);
335 /* No definite answer... Walk to established hash table */
336 ret = __inet6_check_established(death_row, sk, snum, NULL);
337out:
338 local_bh_enable();
339 return ret;
340 }
341} 241}
342 242
343EXPORT_SYMBOL_GPL(inet6_hash_connect); 243EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 946cf389ab95..f93407cf6515 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -361,6 +361,7 @@ end:
361 361
362static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 362static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
363{ 363{
364 struct net *net = skb->sk->sk_net;
364 unsigned int h, s_h; 365 unsigned int h, s_h;
365 unsigned int e = 0, s_e; 366 unsigned int e = 0, s_e;
366 struct rt6_rtnl_dump_arg arg; 367 struct rt6_rtnl_dump_arg arg;
@@ -369,6 +370,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
369 struct hlist_node *node; 370 struct hlist_node *node;
370 int res = 0; 371 int res = 0;
371 372
373 if (net != &init_net)
374 return 0;
375
372 s_h = cb->args[0]; 376 s_h = cb->args[0];
373 s_e = cb->args[1]; 377 s_e = cb->args[1];
374 378
@@ -677,13 +681,15 @@ static __inline__ void fib6_start_gc(struct rt6_info *rt)
677{ 681{
678 if (ip6_fib_timer.expires == 0 && 682 if (ip6_fib_timer.expires == 0 &&
679 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) 683 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
680 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 684 mod_timer(&ip6_fib_timer, jiffies +
685 init_net.ipv6.sysctl.ip6_rt_gc_interval);
681} 686}
682 687
683void fib6_force_start_gc(void) 688void fib6_force_start_gc(void)
684{ 689{
685 if (ip6_fib_timer.expires == 0) 690 if (ip6_fib_timer.expires == 0)
686 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 691 mod_timer(&ip6_fib_timer, jiffies +
692 init_net.ipv6.sysctl.ip6_rt_gc_interval);
687} 693}
688 694
689/* 695/*
@@ -1122,9 +1128,6 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1122 1128
1123 rt->u.dst.rt6_next = NULL; 1129 rt->u.dst.rt6_next = NULL;
1124 1130
1125 if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
1126 fn->leaf = &ip6_null_entry;
1127
1128 /* If it was last route, expunge its radix tree node */ 1131 /* If it was last route, expunge its radix tree node */
1129 if (fn->leaf == NULL) { 1132 if (fn->leaf == NULL) {
1130 fn->fn_flags &= ~RTN_RTINFO; 1133 fn->fn_flags &= ~RTN_RTINFO;
@@ -1311,6 +1314,9 @@ static int fib6_walk(struct fib6_walker_t *w)
1311 1314
1312static int fib6_clean_node(struct fib6_walker_t *w) 1315static int fib6_clean_node(struct fib6_walker_t *w)
1313{ 1316{
1317 struct nl_info info = {
1318 .nl_net = &init_net,
1319 };
1314 int res; 1320 int res;
1315 struct rt6_info *rt; 1321 struct rt6_info *rt;
1316 struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); 1322 struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
@@ -1319,7 +1325,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1319 res = c->func(rt, c->arg); 1325 res = c->func(rt, c->arg);
1320 if (res < 0) { 1326 if (res < 0) {
1321 w->leaf = rt; 1327 w->leaf = rt;
1322 res = fib6_del(rt, NULL); 1328 res = fib6_del(rt, &info);
1323 if (res) { 1329 if (res) {
1324#if RT6_DEBUG >= 2 1330#if RT6_DEBUG >= 2
1325 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); 1331 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
@@ -1445,7 +1451,8 @@ void fib6_run_gc(unsigned long dummy)
1445{ 1451{
1446 if (dummy != ~0UL) { 1452 if (dummy != ~0UL) {
1447 spin_lock_bh(&fib6_gc_lock); 1453 spin_lock_bh(&fib6_gc_lock);
1448 gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval; 1454 gc_args.timeout = dummy ? (int)dummy :
1455 init_net.ipv6.sysctl.ip6_rt_gc_interval;
1449 } else { 1456 } else {
1450 local_bh_disable(); 1457 local_bh_disable();
1451 if (!spin_trylock(&fib6_gc_lock)) { 1458 if (!spin_trylock(&fib6_gc_lock)) {
@@ -1453,7 +1460,7 @@ void fib6_run_gc(unsigned long dummy)
1453 local_bh_enable(); 1460 local_bh_enable();
1454 return; 1461 return;
1455 } 1462 }
1456 gc_args.timeout = ip6_rt_gc_interval; 1463 gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval;
1457 } 1464 }
1458 gc_args.more = 0; 1465 gc_args.more = 0;
1459 1466
@@ -1461,7 +1468,8 @@ void fib6_run_gc(unsigned long dummy)
1461 fib6_clean_all(fib6_age, 0, NULL); 1468 fib6_clean_all(fib6_age, 0, NULL);
1462 1469
1463 if (gc_args.more) 1470 if (gc_args.more)
1464 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 1471 mod_timer(&ip6_fib_timer, jiffies +
1472 init_net.ipv6.sysctl.ip6_rt_gc_interval);
1465 else { 1473 else {
1466 del_timer(&ip6_fib_timer); 1474 del_timer(&ip6_fib_timer);
1467 ip6_fib_timer.expires = 0; 1475 ip6_fib_timer.expires = 0;
@@ -1469,16 +1477,27 @@ void fib6_run_gc(unsigned long dummy)
1469 spin_unlock_bh(&fib6_gc_lock); 1477 spin_unlock_bh(&fib6_gc_lock);
1470} 1478}
1471 1479
1472void __init fib6_init(void) 1480int __init fib6_init(void)
1473{ 1481{
1482 int ret;
1474 fib6_node_kmem = kmem_cache_create("fib6_nodes", 1483 fib6_node_kmem = kmem_cache_create("fib6_nodes",
1475 sizeof(struct fib6_node), 1484 sizeof(struct fib6_node),
1476 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 1485 0, SLAB_HWCACHE_ALIGN,
1477 NULL); 1486 NULL);
1487 if (!fib6_node_kmem)
1488 return -ENOMEM;
1478 1489
1479 fib6_tables_init(); 1490 fib6_tables_init();
1480 1491
1481 __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); 1492 ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
1493 if (ret)
1494 goto out_kmem_cache_create;
1495out:
1496 return ret;
1497
1498out_kmem_cache_create:
1499 kmem_cache_destroy(fib6_node_kmem);
1500 goto out;
1482} 1501}
1483 1502
1484void fib6_gc_cleanup(void) 1503void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b12cc22e7745..2b7d9ee98832 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -629,6 +629,7 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
629} 629}
630 630
631static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) 631static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
632 __acquires(ip6_fl_lock)
632{ 633{
633 read_lock_bh(&ip6_fl_lock); 634 read_lock_bh(&ip6_fl_lock);
634 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 635 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -647,6 +648,7 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
647} 648}
648 649
649static void ip6fl_seq_stop(struct seq_file *seq, void *v) 650static void ip6fl_seq_stop(struct seq_file *seq, void *v)
651 __releases(ip6_fl_lock)
650{ 652{
651 read_unlock_bh(&ip6_fl_lock); 653 read_unlock_bh(&ip6_fl_lock);
652} 654}
@@ -692,20 +694,36 @@ static const struct file_operations ip6fl_seq_fops = {
692 .llseek = seq_lseek, 694 .llseek = seq_lseek,
693 .release = seq_release_private, 695 .release = seq_release_private,
694}; 696};
695#endif
696 697
698static int ip6_flowlabel_proc_init(struct net *net)
699{
700 if (!proc_net_fops_create(net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops))
701 return -ENOMEM;
702 return 0;
703}
697 704
698void ip6_flowlabel_init(void) 705static void ip6_flowlabel_proc_fini(struct net *net)
699{ 706{
700#ifdef CONFIG_PROC_FS 707 proc_net_remove(net, "ip6_flowlabel");
701 proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); 708}
709#else
710static inline int ip6_flowlabel_proc_init(struct net *net)
711{
712 return 0;
713}
714static inline void ip6_flowlabel_proc_fini(struct net *net)
715{
716 return ;
717}
702#endif 718#endif
719
720int ip6_flowlabel_init(void)
721{
722 return ip6_flowlabel_proc_init(&init_net);
703} 723}
704 724
705void ip6_flowlabel_cleanup(void) 725void ip6_flowlabel_cleanup(void)
706{ 726{
707 del_timer(&ip6_fl_gc_timer); 727 del_timer(&ip6_fl_gc_timer);
708#ifdef CONFIG_PROC_FS 728 ip6_flowlabel_proc_fini(&init_net);
709 proc_net_remove(&init_net, "ip6_flowlabel");
710#endif
711} 729}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index fac6f7f9dd73..178aebc0427a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -134,7 +134,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
134 134
135 rcu_read_unlock(); 135 rcu_read_unlock();
136 136
137 return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); 137 return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL,
138 ip6_rcv_finish);
138err: 139err:
139 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); 140 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
140drop: 141drop:
@@ -152,9 +153,8 @@ out:
152static int ip6_input_finish(struct sk_buff *skb) 153static int ip6_input_finish(struct sk_buff *skb)
153{ 154{
154 struct inet6_protocol *ipprot; 155 struct inet6_protocol *ipprot;
155 struct sock *raw_sk;
156 unsigned int nhoff; 156 unsigned int nhoff;
157 int nexthdr; 157 int nexthdr, raw;
158 u8 hash; 158 u8 hash;
159 struct inet6_dev *idev; 159 struct inet6_dev *idev;
160 160
@@ -170,9 +170,7 @@ resubmit:
170 nhoff = IP6CB(skb)->nhoff; 170 nhoff = IP6CB(skb)->nhoff;
171 nexthdr = skb_network_header(skb)[nhoff]; 171 nexthdr = skb_network_header(skb)[nhoff];
172 172
173 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); 173 raw = raw6_local_deliver(skb, nexthdr);
174 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
175 raw_sk = NULL;
176 174
177 hash = nexthdr & (MAX_INET_PROTOS - 1); 175 hash = nexthdr & (MAX_INET_PROTOS - 1);
178 if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { 176 if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
@@ -205,7 +203,7 @@ resubmit:
205 else if (ret == 0) 203 else if (ret == 0)
206 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); 204 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
207 } else { 205 } else {
208 if (!raw_sk) { 206 if (!raw) {
209 if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 207 if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
210 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); 208 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS);
211 icmpv6_send(skb, ICMPV6_PARAMPROB, 209 icmpv6_send(skb, ICMPV6_PARAMPROB,
@@ -229,7 +227,8 @@ discard:
229 227
230int ip6_input(struct sk_buff *skb) 228int ip6_input(struct sk_buff *skb)
231{ 229{
232 return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish); 230 return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
231 ip6_input_finish);
233} 232}
234 233
235int ip6_mc_input(struct sk_buff *skb) 234int ip6_mc_input(struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 653fc0a8235b..9ac6ca2521c3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -29,7 +29,7 @@
29 */ 29 */
30 30
31#include <linux/errno.h> 31#include <linux/errno.h>
32#include <linux/types.h> 32#include <linux/kernel.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/socket.h> 34#include <linux/socket.h>
35#include <linux/net.h> 35#include <linux/net.h>
@@ -70,6 +70,31 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f
70 spin_unlock_bh(&ip6_id_lock); 70 spin_unlock_bh(&ip6_id_lock);
71} 71}
72 72
73int __ip6_local_out(struct sk_buff *skb)
74{
75 int len;
76
77 len = skb->len - sizeof(struct ipv6hdr);
78 if (len > IPV6_MAXPLEN)
79 len = 0;
80 ipv6_hdr(skb)->payload_len = htons(len);
81
82 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
83 dst_output);
84}
85
86int ip6_local_out(struct sk_buff *skb)
87{
88 int err;
89
90 err = __ip6_local_out(skb);
91 if (likely(err == 1))
92 err = dst_output(skb);
93
94 return err;
95}
96EXPORT_SYMBOL_GPL(ip6_local_out);
97
73static int ip6_output_finish(struct sk_buff *skb) 98static int ip6_output_finish(struct sk_buff *skb)
74{ 99{
75 struct dst_entry *dst = skb->dst; 100 struct dst_entry *dst = skb->dst;
@@ -120,8 +145,8 @@ static int ip6_output2(struct sk_buff *skb)
120 is not supported in any case. 145 is not supported in any case.
121 */ 146 */
122 if (newskb) 147 if (newskb)
123 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL, 148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
124 newskb->dev, 149 NULL, newskb->dev,
125 ip6_dev_loopback_xmit); 150 ip6_dev_loopback_xmit);
126 151
127 if (ipv6_hdr(skb)->hop_limit == 0) { 152 if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -134,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb)
134 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
135 } 160 }
136 161
137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); 162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
138} 164}
139 165
140static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
@@ -231,12 +257,13 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
231 ipv6_addr_copy(&hdr->daddr, first_hop); 257 ipv6_addr_copy(&hdr->daddr, first_hop);
232 258
233 skb->priority = sk->sk_priority; 259 skb->priority = sk->sk_priority;
260 skb->mark = sk->sk_mark;
234 261
235 mtu = dst_mtu(dst); 262 mtu = dst_mtu(dst);
236 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { 263 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
237 IP6_INC_STATS(ip6_dst_idev(skb->dst), 264 IP6_INC_STATS(ip6_dst_idev(skb->dst),
238 IPSTATS_MIB_OUTREQUESTS); 265 IPSTATS_MIB_OUTREQUESTS);
239 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, 266 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
240 dst_output); 267 dst_output);
241 } 268 }
242 269
@@ -423,7 +450,7 @@ int ip6_forward(struct sk_buff *skb)
423 450
424 /* XXX: idev->cnf.proxy_ndp? */ 451 /* XXX: idev->cnf.proxy_ndp? */
425 if (ipv6_devconf.proxy_ndp && 452 if (ipv6_devconf.proxy_ndp &&
426 pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { 453 pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
427 int proxied = ip6_forward_proxy_check(skb); 454 int proxied = ip6_forward_proxy_check(skb);
428 if (proxied > 0) 455 if (proxied > 0)
429 return ip6_input(skb); 456 return ip6_input(skb);
@@ -500,7 +527,8 @@ int ip6_forward(struct sk_buff *skb)
500 hdr->hop_limit--; 527 hdr->hop_limit--;
501 528
502 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 529 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
503 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); 530 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
531 ip6_forward_finish);
504 532
505error: 533error:
506 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 534 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
@@ -609,6 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
609 637
610 if (skb_shinfo(skb)->frag_list) { 638 if (skb_shinfo(skb)->frag_list) {
611 int first_len = skb_pagelen(skb); 639 int first_len = skb_pagelen(skb);
640 int truesizes = 0;
612 641
613 if (first_len - hlen > mtu || 642 if (first_len - hlen > mtu ||
614 ((first_len - hlen) & 7) || 643 ((first_len - hlen) & 7) ||
@@ -631,7 +660,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
631 sock_hold(skb->sk); 660 sock_hold(skb->sk);
632 frag->sk = skb->sk; 661 frag->sk = skb->sk;
633 frag->destructor = sock_wfree; 662 frag->destructor = sock_wfree;
634 skb->truesize -= frag->truesize; 663 truesizes += frag->truesize;
635 } 664 }
636 } 665 }
637 666
@@ -662,6 +691,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
662 691
663 first_len = skb_pagelen(skb); 692 first_len = skb_pagelen(skb);
664 skb->data_len = first_len - skb_headlen(skb); 693 skb->data_len = first_len - skb_headlen(skb);
694 skb->truesize -= truesizes;
665 skb->len = first_len; 695 skb->len = first_len;
666 ipv6_hdr(skb)->payload_len = htons(first_len - 696 ipv6_hdr(skb)->payload_len = htons(first_len -
667 sizeof(struct ipv6hdr)); 697 sizeof(struct ipv6hdr));
@@ -909,7 +939,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
909 struct flowi fl_gw; 939 struct flowi fl_gw;
910 int redirect; 940 int redirect;
911 941
912 ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1); 942 ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
943 (*dst)->dev, 1);
913 944
914 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 945 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915 if (ifp) 946 if (ifp)
@@ -933,6 +964,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
933 return 0; 964 return 0;
934 965
935out_err_release: 966out_err_release:
967 if (err == -ENETUNREACH)
968 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
936 dst_release(*dst); 969 dst_release(*dst);
937 *dst = NULL; 970 *dst = NULL;
938 return err; 971 return err;
@@ -1096,7 +1129,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1096 inet->cork.length = 0; 1129 inet->cork.length = 0;
1097 sk->sk_sndmsg_page = NULL; 1130 sk->sk_sndmsg_page = NULL;
1098 sk->sk_sndmsg_off = 0; 1131 sk->sk_sndmsg_off = 0;
1099 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0); 1132 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1133 rt->rt6i_nfheader_len;
1100 length += exthdrlen; 1134 length += exthdrlen;
1101 transhdrlen += exthdrlen; 1135 transhdrlen += exthdrlen;
1102 } else { 1136 } else {
@@ -1111,7 +1145,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1111 1145
1112 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1146 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1113 1147
1114 fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); 1148 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1149 (opt ? opt->opt_nflen : 0);
1115 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1150 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1116 1151
1117 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1152 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
@@ -1314,8 +1349,6 @@ alloc_new_skb:
1314 1349
1315 skb_fill_page_desc(skb, i, page, 0, 0); 1350 skb_fill_page_desc(skb, i, page, 0, 0);
1316 frag = &skb_shinfo(skb)->frags[i]; 1351 frag = &skb_shinfo(skb)->frags[i];
1317 skb->truesize += PAGE_SIZE;
1318 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1319 } else { 1352 } else {
1320 err = -EMSGSIZE; 1353 err = -EMSGSIZE;
1321 goto error; 1354 goto error;
@@ -1328,6 +1361,8 @@ alloc_new_skb:
1328 frag->size += copy; 1361 frag->size += copy;
1329 skb->len += copy; 1362 skb->len += copy;
1330 skb->data_len += copy; 1363 skb->data_len += copy;
1364 skb->truesize += copy;
1365 atomic_add(copy, &sk->sk_wmem_alloc);
1331 } 1366 }
1332 offset += copy; 1367 offset += copy;
1333 length -= copy; 1368 length -= copy;
@@ -1339,6 +1374,19 @@ error:
1339 return err; 1374 return err;
1340} 1375}
1341 1376
1377static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1378{
1379 inet->cork.flags &= ~IPCORK_OPT;
1380 kfree(np->cork.opt);
1381 np->cork.opt = NULL;
1382 if (np->cork.rt) {
1383 dst_release(&np->cork.rt->u.dst);
1384 np->cork.rt = NULL;
1385 inet->cork.flags &= ~IPCORK_ALLFRAG;
1386 }
1387 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1388}
1389
1342int ip6_push_pending_frames(struct sock *sk) 1390int ip6_push_pending_frames(struct sock *sk)
1343{ 1391{
1344 struct sk_buff *skb, *tmp_skb; 1392 struct sk_buff *skb, *tmp_skb;
@@ -1386,16 +1434,13 @@ int ip6_push_pending_frames(struct sock *sk)
1386 *(__be32*)hdr = fl->fl6_flowlabel | 1434 *(__be32*)hdr = fl->fl6_flowlabel |
1387 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1435 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1388 1436
1389 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1390 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1391 else
1392 hdr->payload_len = 0;
1393 hdr->hop_limit = np->cork.hop_limit; 1437 hdr->hop_limit = np->cork.hop_limit;
1394 hdr->nexthdr = proto; 1438 hdr->nexthdr = proto;
1395 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1439 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1396 ipv6_addr_copy(&hdr->daddr, final_dst); 1440 ipv6_addr_copy(&hdr->daddr, final_dst);
1397 1441
1398 skb->priority = sk->sk_priority; 1442 skb->priority = sk->sk_priority;
1443 skb->mark = sk->sk_mark;
1399 1444
1400 skb->dst = dst_clone(&rt->u.dst); 1445 skb->dst = dst_clone(&rt->u.dst);
1401 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); 1446 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
@@ -1406,7 +1451,7 @@ int ip6_push_pending_frames(struct sock *sk)
1406 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); 1451 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1407 } 1452 }
1408 1453
1409 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); 1454 err = ip6_local_out(skb);
1410 if (err) { 1455 if (err) {
1411 if (err > 0) 1456 if (err > 0)
1412 err = np->recverr ? net_xmit_errno(err) : 0; 1457 err = np->recverr ? net_xmit_errno(err) : 0;
@@ -1415,15 +1460,7 @@ int ip6_push_pending_frames(struct sock *sk)
1415 } 1460 }
1416 1461
1417out: 1462out:
1418 inet->cork.flags &= ~IPCORK_OPT; 1463 ip6_cork_release(inet, np);
1419 kfree(np->cork.opt);
1420 np->cork.opt = NULL;
1421 if (np->cork.rt) {
1422 dst_release(&np->cork.rt->u.dst);
1423 np->cork.rt = NULL;
1424 inet->cork.flags &= ~IPCORK_ALLFRAG;
1425 }
1426 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1427 return err; 1464 return err;
1428error: 1465error:
1429 goto out; 1466 goto out;
@@ -1431,8 +1468,6 @@ error:
1431 1468
1432void ip6_flush_pending_frames(struct sock *sk) 1469void ip6_flush_pending_frames(struct sock *sk)
1433{ 1470{
1434 struct inet_sock *inet = inet_sk(sk);
1435 struct ipv6_pinfo *np = inet6_sk(sk);
1436 struct sk_buff *skb; 1471 struct sk_buff *skb;
1437 1472
1438 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1473 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
@@ -1442,14 +1477,5 @@ void ip6_flush_pending_frames(struct sock *sk)
1442 kfree_skb(skb); 1477 kfree_skb(skb);
1443 } 1478 }
1444 1479
1445 inet->cork.flags &= ~IPCORK_OPT; 1480 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1446
1447 kfree(np->cork.opt);
1448 np->cork.opt = NULL;
1449 if (np->cork.rt) {
1450 dst_release(&np->cork.rt->u.dst);
1451 np->cork.rt = NULL;
1452 inet->cork.flags &= ~IPCORK_ALLFRAG;
1453 }
1454 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1455} 1481}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5383b33db8ca..9031e521c1df 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -533,7 +533,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
533 fl.fl4_dst = eiph->saddr; 533 fl.fl4_dst = eiph->saddr;
534 fl.fl4_tos = RT_TOS(eiph->tos); 534 fl.fl4_tos = RT_TOS(eiph->tos);
535 fl.proto = IPPROTO_IPIP; 535 fl.proto = IPPROTO_IPIP;
536 if (ip_route_output_key(&rt, &fl)) 536 if (ip_route_output_key(&init_net, &rt, &fl))
537 goto out; 537 goto out;
538 538
539 skb2->dev = rt->u.dst.dev; 539 skb2->dev = rt->u.dst.dev;
@@ -545,7 +545,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
545 fl.fl4_dst = eiph->daddr; 545 fl.fl4_dst = eiph->daddr;
546 fl.fl4_src = eiph->saddr; 546 fl.fl4_src = eiph->saddr;
547 fl.fl4_tos = eiph->tos; 547 fl.fl4_tos = eiph->tos;
548 if (ip_route_output_key(&rt, &fl) || 548 if (ip_route_output_key(&init_net, &rt, &fl) ||
549 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 549 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
550 ip_rt_put(rt); 550 ip_rt_put(rt);
551 goto out; 551 goto out;
@@ -635,7 +635,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
635 struct sk_buff *skb) 635 struct sk_buff *skb)
636{ 636{
637 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 637 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
638 ipv6_copy_dscp(ipv6h, ipv6_hdr(skb)); 638 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
639 639
640 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) 640 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
641 IP6_ECN_set_ce(ipv6_hdr(skb)); 641 IP6_ECN_set_ce(ipv6_hdr(skb));
@@ -653,8 +653,8 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
653 ldev = dev_get_by_index(&init_net, p->link); 653 ldev = dev_get_by_index(&init_net, p->link);
654 654
655 if ((ipv6_addr_is_multicast(&p->laddr) || 655 if ((ipv6_addr_is_multicast(&p->laddr) ||
656 likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && 656 likely(ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) &&
657 likely(!ipv6_chk_addr(&p->raddr, NULL, 0))) 657 likely(!ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
658 ret = 1; 658 ret = 1;
659 659
660 if (ldev) 660 if (ldev)
@@ -788,12 +788,12 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
788 if (p->link) 788 if (p->link)
789 ldev = dev_get_by_index(&init_net, p->link); 789 ldev = dev_get_by_index(&init_net, p->link);
790 790
791 if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) 791 if (unlikely(!ipv6_chk_addr(&init_net, &p->laddr, ldev, 0)))
792 printk(KERN_WARNING 792 printk(KERN_WARNING
793 "%s xmit: Local address not yet configured!\n", 793 "%s xmit: Local address not yet configured!\n",
794 p->name); 794 p->name);
795 else if (!ipv6_addr_is_multicast(&p->raddr) && 795 else if (!ipv6_addr_is_multicast(&p->raddr) &&
796 unlikely(ipv6_chk_addr(&p->raddr, NULL, 0))) 796 unlikely(ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
797 printk(KERN_WARNING 797 printk(KERN_WARNING
798 "%s xmit: Routing loop! " 798 "%s xmit: Routing loop! "
799 "Remote address found on this node!\n", 799 "Remote address found on this node!\n",
@@ -910,15 +910,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
910 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); 910 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
911 dsfield = INET_ECN_encapsulate(0, dsfield); 911 dsfield = INET_ECN_encapsulate(0, dsfield);
912 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); 912 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
913 ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
914 ipv6h->hop_limit = t->parms.hop_limit; 913 ipv6h->hop_limit = t->parms.hop_limit;
915 ipv6h->nexthdr = proto; 914 ipv6h->nexthdr = proto;
916 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); 915 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
917 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); 916 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
918 nf_reset(skb); 917 nf_reset(skb);
919 pkt_len = skb->len; 918 pkt_len = skb->len;
920 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, 919 err = ip6_local_out(skb);
921 skb->dst->dev, dst_output);
922 920
923 if (net_xmit_eval(err) == 0) { 921 if (net_xmit_eval(err) == 0) {
924 stats->tx_bytes += pkt_len; 922 stats->tx_bytes += pkt_len;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 80ef2a1d39fd..b90039593a7f 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -34,9 +34,9 @@
34#include <net/ip.h> 34#include <net/ip.h>
35#include <net/xfrm.h> 35#include <net/xfrm.h>
36#include <net/ipcomp.h> 36#include <net/ipcomp.h>
37#include <asm/scatterlist.h>
38#include <asm/semaphore.h> 37#include <asm/semaphore.h>
39#include <linux/crypto.h> 38#include <linux/crypto.h>
39#include <linux/err.h>
40#include <linux/pfkeyv2.h> 40#include <linux/pfkeyv2.h>
41#include <linux/random.h> 41#include <linux/random.h>
42#include <linux/percpu.h> 42#include <linux/percpu.h>
@@ -64,6 +64,7 @@ static LIST_HEAD(ipcomp6_tfms_list);
64 64
65static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) 65static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
66{ 66{
67 int nexthdr;
67 int err = -ENOMEM; 68 int err = -ENOMEM;
68 struct ip_comp_hdr *ipch; 69 struct ip_comp_hdr *ipch;
69 int plen, dlen; 70 int plen, dlen;
@@ -79,6 +80,8 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
79 80
80 /* Remove ipcomp header and decompress original payload */ 81 /* Remove ipcomp header and decompress original payload */
81 ipch = (void *)skb->data; 82 ipch = (void *)skb->data;
83 nexthdr = ipch->nexthdr;
84
82 skb->transport_header = skb->network_header + sizeof(*ipch); 85 skb->transport_header = skb->network_header + sizeof(*ipch);
83 __skb_pull(skb, sizeof(*ipch)); 86 __skb_pull(skb, sizeof(*ipch));
84 87
@@ -108,7 +111,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
108 skb->truesize += dlen - plen; 111 skb->truesize += dlen - plen;
109 __skb_put(skb, dlen - plen); 112 __skb_put(skb, dlen - plen);
110 skb_copy_to_linear_data(skb, scratch, dlen); 113 skb_copy_to_linear_data(skb, scratch, dlen);
111 err = ipch->nexthdr; 114 err = nexthdr;
112 115
113out_put_cpu: 116out_put_cpu:
114 put_cpu(); 117 put_cpu();
@@ -190,7 +193,6 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
190static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) 193static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
191{ 194{
192 struct xfrm_state *t = NULL; 195 struct xfrm_state *t = NULL;
193 u8 mode = XFRM_MODE_TUNNEL;
194 196
195 t = xfrm_state_alloc(); 197 t = xfrm_state_alloc();
196 if (!t) 198 if (!t)
@@ -204,9 +206,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
204 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); 206 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
205 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 207 memcpy(&t->sel, &x->sel, sizeof(t->sel));
206 t->props.family = AF_INET6; 208 t->props.family = AF_INET6;
207 if (x->props.mode == XFRM_MODE_BEET) 209 t->props.mode = x->props.mode;
208 mode = x->props.mode;
209 t->props.mode = mode;
210 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); 210 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
211 211
212 if (xfrm_init_state(t)) 212 if (xfrm_init_state(t))
@@ -359,7 +359,7 @@ static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name)
359 for_each_possible_cpu(cpu) { 359 for_each_possible_cpu(cpu) {
360 struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, 360 struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
361 CRYPTO_ALG_ASYNC); 361 CRYPTO_ALG_ASYNC);
362 if (!tfm) 362 if (IS_ERR(tfm))
363 goto error; 363 goto error;
364 *per_cpu_ptr(tfms, cpu) = tfm; 364 *per_cpu_ptr(tfms, cpu) = tfm;
365 } 365 }
@@ -405,22 +405,22 @@ static int ipcomp6_init_state(struct xfrm_state *x)
405 if (x->encap) 405 if (x->encap)
406 goto out; 406 goto out;
407 407
408 err = -ENOMEM;
409 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
410 if (!ipcd)
411 goto out;
412
413 x->props.header_len = 0; 408 x->props.header_len = 0;
414 switch (x->props.mode) { 409 switch (x->props.mode) {
415 case XFRM_MODE_BEET:
416 case XFRM_MODE_TRANSPORT: 410 case XFRM_MODE_TRANSPORT:
417 break; 411 break;
418 case XFRM_MODE_TUNNEL: 412 case XFRM_MODE_TUNNEL:
419 x->props.header_len += sizeof(struct ipv6hdr); 413 x->props.header_len += sizeof(struct ipv6hdr);
414 break;
420 default: 415 default:
421 goto error; 416 goto out;
422 } 417 }
423 418
419 err = -ENOMEM;
420 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
421 if (!ipcd)
422 goto out;
423
424 mutex_lock(&ipcomp6_resource_mutex); 424 mutex_lock(&ipcomp6_resource_mutex);
425 if (!ipcomp6_alloc_scratches()) 425 if (!ipcomp6_alloc_scratches())
426 goto error; 426 goto error;
@@ -453,7 +453,7 @@ error:
453 goto out; 453 goto out;
454} 454}
455 455
456static struct xfrm_type ipcomp6_type = 456static const struct xfrm_type ipcomp6_type =
457{ 457{
458 .description = "IPCOMP6", 458 .description = "IPCOMP6",
459 .owner = THIS_MODULE, 459 .owner = THIS_MODULE,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1334fc174bcf..bf2a686aa13d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -268,8 +268,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
268 struct inet_connection_sock *icsk = inet_csk(sk); 268 struct inet_connection_sock *icsk = inet_csk(sk);
269 269
270 local_bh_disable(); 270 local_bh_disable();
271 sock_prot_dec_use(sk->sk_prot); 271 sock_prot_inuse_add(sk->sk_prot, -1);
272 sock_prot_inc_use(&tcp_prot); 272 sock_prot_inuse_add(&tcp_prot, 1);
273 local_bh_enable(); 273 local_bh_enable();
274 sk->sk_prot = &tcp_prot; 274 sk->sk_prot = &tcp_prot;
275 icsk->icsk_af_ops = &ipv4_specific; 275 icsk->icsk_af_ops = &ipv4_specific;
@@ -282,8 +282,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
282 if (sk->sk_protocol == IPPROTO_UDPLITE) 282 if (sk->sk_protocol == IPPROTO_UDPLITE)
283 prot = &udplite_prot; 283 prot = &udplite_prot;
284 local_bh_disable(); 284 local_bh_disable();
285 sock_prot_dec_use(sk->sk_prot); 285 sock_prot_inuse_add(sk->sk_prot, -1);
286 sock_prot_inc_use(prot); 286 sock_prot_inuse_add(prot, 1);
287 local_bh_enable(); 287 local_bh_enable();
288 sk->sk_prot = prot; 288 sk->sk_prot = prot;
289 sk->sk_socket->ops = &inet_dgram_ops; 289 sk->sk_socket->ops = &inet_dgram_ops;
@@ -1046,7 +1046,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1046 break; 1046 break;
1047 1047
1048 default: 1048 default:
1049 return -EINVAL; 1049 return -ENOPROTOOPT;
1050 } 1050 }
1051 len = min_t(unsigned int, sizeof(int), len); 1051 len = min_t(unsigned int, sizeof(int), len);
1052 if(put_user(len, optlen)) 1052 if(put_user(len, optlen))
@@ -1069,9 +1069,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
1069 1069
1070 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); 1070 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
1071#ifdef CONFIG_NETFILTER 1071#ifdef CONFIG_NETFILTER
1072 /* we need to exclude all possible EINVALs except default case */ 1072 /* we need to exclude all possible ENOPROTOOPTs except default case */
1073 if (err == -EINVAL && optname != IPV6_ADDRFORM && 1073 if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
1074 optname != MCAST_MSFILTER) {
1075 int len; 1074 int len;
1076 1075
1077 if (get_user(len, optlen)) 1076 if (get_user(len, optlen))
@@ -1108,9 +1107,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
1108 1107
1109 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); 1108 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
1110#ifdef CONFIG_NETFILTER 1109#ifdef CONFIG_NETFILTER
1111 /* we need to exclude all possible EINVALs except default case */ 1110 /* we need to exclude all possible ENOPROTOOPTs except default case */
1112 if (err == -EINVAL && optname != IPV6_ADDRFORM && 1111 if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
1113 optname != MCAST_MSFILTER) {
1114 int len; 1112 int len;
1115 1113
1116 if (get_user(len, optlen)) 1114 if (get_user(len, optlen))
@@ -1130,9 +1128,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
1130EXPORT_SYMBOL(compat_ipv6_getsockopt); 1128EXPORT_SYMBOL(compat_ipv6_getsockopt);
1131#endif 1129#endif
1132 1130
1133void __init ipv6_packet_init(void) 1131int __init ipv6_packet_init(void)
1134{ 1132{
1135 dev_add_pack(&ipv6_packet_type); 1133 dev_add_pack(&ipv6_packet_type);
1134 return 0;
1136} 1135}
1137 1136
1138void ipv6_packet_cleanup(void) 1137void ipv6_packet_cleanup(void)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 331d728c2035..ab228d1ea114 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -903,9 +903,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
903 return -ENOMEM; 903 return -ENOMEM;
904 } 904 }
905 905
906 init_timer(&mc->mca_timer); 906 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
907 mc->mca_timer.function = igmp6_timer_handler;
908 mc->mca_timer.data = (unsigned long) mc;
909 907
910 ipv6_addr_copy(&mc->mca_addr, addr); 908 ipv6_addr_copy(&mc->mca_addr, addr);
911 mc->idev = idev; 909 mc->idev = idev;
@@ -1450,7 +1448,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
1450 1448
1451static inline int mld_dev_queue_xmit(struct sk_buff *skb) 1449static inline int mld_dev_queue_xmit(struct sk_buff *skb)
1452{ 1450{
1453 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev, 1451 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
1454 mld_dev_queue_xmit2); 1452 mld_dev_queue_xmit2);
1455} 1453}
1456 1454
@@ -1471,7 +1469,7 @@ static void mld_sendpack(struct sk_buff *skb)
1471 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, 1469 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
1472 IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), 1470 IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
1473 mldlen, 0)); 1471 mldlen, 0));
1474 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, 1472 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
1475 mld_dev_queue_xmit); 1473 mld_dev_queue_xmit);
1476 if (!err) { 1474 if (!err) {
1477 ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); 1475 ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT);
@@ -1815,7 +1813,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1815 1813
1816 idev = in6_dev_get(skb->dev); 1814 idev = in6_dev_get(skb->dev);
1817 1815
1818 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, 1816 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
1819 mld_dev_queue_xmit); 1817 mld_dev_queue_xmit);
1820 if (!err) { 1818 if (!err) {
1821 ICMP6MSGOUT_INC_STATS(idev, type); 1819 ICMP6MSGOUT_INC_STATS(idev, type);
@@ -2259,14 +2257,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
2259 write_lock_bh(&idev->lock); 2257 write_lock_bh(&idev->lock);
2260 rwlock_init(&idev->mc_lock); 2258 rwlock_init(&idev->mc_lock);
2261 idev->mc_gq_running = 0; 2259 idev->mc_gq_running = 0;
2262 init_timer(&idev->mc_gq_timer); 2260 setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire,
2263 idev->mc_gq_timer.data = (unsigned long) idev; 2261 (unsigned long)idev);
2264 idev->mc_gq_timer.function = &mld_gq_timer_expire;
2265 idev->mc_tomb = NULL; 2262 idev->mc_tomb = NULL;
2266 idev->mc_ifc_count = 0; 2263 idev->mc_ifc_count = 0;
2267 init_timer(&idev->mc_ifc_timer); 2264 setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
2268 idev->mc_ifc_timer.data = (unsigned long) idev; 2265 (unsigned long)idev);
2269 idev->mc_ifc_timer.function = &mld_ifc_timer_expire;
2270 idev->mc_qrv = MLD_QRV_DEFAULT; 2266 idev->mc_qrv = MLD_QRV_DEFAULT;
2271 idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; 2267 idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
2272 idev->mc_v1_seen = 0; 2268 idev->mc_v1_seen = 0;
@@ -2377,6 +2373,7 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
2377} 2373}
2378 2374
2379static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) 2375static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
2376 __acquires(dev_base_lock)
2380{ 2377{
2381 read_lock(&dev_base_lock); 2378 read_lock(&dev_base_lock);
2382 return igmp6_mc_get_idx(seq, *pos); 2379 return igmp6_mc_get_idx(seq, *pos);
@@ -2391,6 +2388,7 @@ static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2391} 2388}
2392 2389
2393static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) 2390static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
2391 __releases(dev_base_lock)
2394{ 2392{
2395 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); 2393 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
2396 if (likely(state->idev != NULL)) { 2394 if (likely(state->idev != NULL)) {
@@ -2520,6 +2518,7 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
2520} 2518}
2521 2519
2522static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) 2520static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
2521 __acquires(dev_base_lock)
2523{ 2522{
2524 read_lock(&dev_base_lock); 2523 read_lock(&dev_base_lock);
2525 return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2524 return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2537,6 +2536,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2537} 2536}
2538 2537
2539static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) 2538static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
2539 __releases(dev_base_lock)
2540{ 2540{
2541 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); 2541 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
2542 if (likely(state->im != NULL)) { 2542 if (likely(state->im != NULL)) {
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 7fd841d41019..cd8a5bda13cd 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -34,11 +34,6 @@
34#include <net/xfrm.h> 34#include <net/xfrm.h>
35#include <net/mip6.h> 35#include <net/mip6.h>
36 36
37static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr)
38{
39 return x->coaddr;
40}
41
42static inline unsigned int calc_padlen(unsigned int len, unsigned int n) 37static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
43{ 38{
44 return (n - len + 16) & 0x7; 39 return (n - len + 16) & 0x7;
@@ -133,12 +128,15 @@ static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
133{ 128{
134 struct ipv6hdr *iph = ipv6_hdr(skb); 129 struct ipv6hdr *iph = ipv6_hdr(skb);
135 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; 130 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
131 int err = destopt->nexthdr;
136 132
133 spin_lock(&x->lock);
137 if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && 134 if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
138 !ipv6_addr_any((struct in6_addr *)x->coaddr)) 135 !ipv6_addr_any((struct in6_addr *)x->coaddr))
139 return -ENOENT; 136 err = -ENOENT;
137 spin_unlock(&x->lock);
140 138
141 return destopt->nexthdr; 139 return err;
142} 140}
143 141
144/* Destination Option Header is inserted. 142/* Destination Option Header is inserted.
@@ -332,30 +330,32 @@ static void mip6_destopt_destroy(struct xfrm_state *x)
332{ 330{
333} 331}
334 332
335static struct xfrm_type mip6_destopt_type = 333static const struct xfrm_type mip6_destopt_type =
336{ 334{
337 .description = "MIP6DESTOPT", 335 .description = "MIP6DESTOPT",
338 .owner = THIS_MODULE, 336 .owner = THIS_MODULE,
339 .proto = IPPROTO_DSTOPTS, 337 .proto = IPPROTO_DSTOPTS,
340 .flags = XFRM_TYPE_NON_FRAGMENT, 338 .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
341 .init_state = mip6_destopt_init_state, 339 .init_state = mip6_destopt_init_state,
342 .destructor = mip6_destopt_destroy, 340 .destructor = mip6_destopt_destroy,
343 .input = mip6_destopt_input, 341 .input = mip6_destopt_input,
344 .output = mip6_destopt_output, 342 .output = mip6_destopt_output,
345 .reject = mip6_destopt_reject, 343 .reject = mip6_destopt_reject,
346 .hdr_offset = mip6_destopt_offset, 344 .hdr_offset = mip6_destopt_offset,
347 .local_addr = mip6_xfrm_addr,
348}; 345};
349 346
350static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) 347static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
351{ 348{
352 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; 349 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
350 int err = rt2->rt_hdr.nexthdr;
353 351
352 spin_lock(&x->lock);
354 if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && 353 if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
355 !ipv6_addr_any((struct in6_addr *)x->coaddr)) 354 !ipv6_addr_any((struct in6_addr *)x->coaddr))
356 return -ENOENT; 355 err = -ENOENT;
356 spin_unlock(&x->lock);
357 357
358 return rt2->rt_hdr.nexthdr; 358 return err;
359} 359}
360 360
361/* Routing Header type 2 is inserted. 361/* Routing Header type 2 is inserted.
@@ -462,18 +462,17 @@ static void mip6_rthdr_destroy(struct xfrm_state *x)
462{ 462{
463} 463}
464 464
465static struct xfrm_type mip6_rthdr_type = 465static const struct xfrm_type mip6_rthdr_type =
466{ 466{
467 .description = "MIP6RT", 467 .description = "MIP6RT",
468 .owner = THIS_MODULE, 468 .owner = THIS_MODULE,
469 .proto = IPPROTO_ROUTING, 469 .proto = IPPROTO_ROUTING,
470 .flags = XFRM_TYPE_NON_FRAGMENT, 470 .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
471 .init_state = mip6_rthdr_init_state, 471 .init_state = mip6_rthdr_init_state,
472 .destructor = mip6_rthdr_destroy, 472 .destructor = mip6_rthdr_destroy,
473 .input = mip6_rthdr_input, 473 .input = mip6_rthdr_input,
474 .output = mip6_rthdr_output, 474 .output = mip6_rthdr_output,
475 .hdr_offset = mip6_rthdr_offset, 475 .hdr_offset = mip6_rthdr_offset,
476 .remote_addr = mip6_xfrm_addr,
477}; 476};
478 477
479static int __init mip6_init(void) 478static int __init mip6_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 20cfc90d5597..0d33a7d32125 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -337,7 +337,7 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
337 ipv6_arcnet_mc_map(addr, buf); 337 ipv6_arcnet_mc_map(addr, buf);
338 return 0; 338 return 0;
339 case ARPHRD_INFINIBAND: 339 case ARPHRD_INFINIBAND:
340 ipv6_ib_mc_map(addr, buf); 340 ipv6_ib_mc_map(addr, dev->broadcast, buf);
341 return 0; 341 return 0;
342 default: 342 default:
343 if (dir) { 343 if (dir) {
@@ -533,7 +533,8 @@ static void __ndisc_send(struct net_device *dev,
533 idev = in6_dev_get(dst->dev); 533 idev = in6_dev_get(dst->dev);
534 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); 534 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
535 535
536 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); 536 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
537 dst_output);
537 if (!err) { 538 if (!err) {
538 ICMP6MSGOUT_INC_STATS(idev, type); 539 ICMP6MSGOUT_INC_STATS(idev, type);
539 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); 540 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
@@ -555,7 +556,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
555 }; 556 };
556 557
557 /* for anycast or proxy, solicited_addr != src_addr */ 558 /* for anycast or proxy, solicited_addr != src_addr */
558 ifp = ipv6_get_ifaddr(solicited_addr, dev, 1); 559 ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1);
559 if (ifp) { 560 if (ifp) {
560 src_addr = solicited_addr; 561 src_addr = solicited_addr;
561 if (ifp->flags & IFA_F_OPTIMISTIC) 562 if (ifp->flags & IFA_F_OPTIMISTIC)
@@ -612,10 +613,11 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
612 * optimistic addresses, but we may send the solicitation 613 * optimistic addresses, but we may send the solicitation
613 * if we don't include the sllao. So here we check 614 * if we don't include the sllao. So here we check
614 * if our address is optimistic, and if so, we 615 * if our address is optimistic, and if so, we
615 * supress the inclusion of the sllao. 616 * suppress the inclusion of the sllao.
616 */ 617 */
617 if (send_sllao) { 618 if (send_sllao) {
618 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1); 619 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr,
620 dev, 1);
619 if (ifp) { 621 if (ifp) {
620 if (ifp->flags & IFA_F_OPTIMISTIC) { 622 if (ifp->flags & IFA_F_OPTIMISTIC) {
621 send_sllao = 0; 623 send_sllao = 0;
@@ -652,7 +654,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
652 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; 654 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
653 int probes = atomic_read(&neigh->probes); 655 int probes = atomic_read(&neigh->probes);
654 656
655 if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1)) 657 if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1))
656 saddr = &ipv6_hdr(skb)->saddr; 658 saddr = &ipv6_hdr(skb)->saddr;
657 659
658 if ((probes -= neigh->parms->ucast_probes) < 0) { 660 if ((probes -= neigh->parms->ucast_probes) < 0) {
@@ -740,7 +742,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
740 742
741 inc = ipv6_addr_is_multicast(daddr); 743 inc = ipv6_addr_is_multicast(daddr);
742 744
743 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) { 745 if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) {
744 746
745 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { 747 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
746 if (dad) { 748 if (dad) {
@@ -788,7 +790,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
788 if (ipv6_chk_acast_addr(dev, &msg->target) || 790 if (ipv6_chk_acast_addr(dev, &msg->target) ||
789 (idev->cnf.forwarding && 791 (idev->cnf.forwarding &&
790 (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && 792 (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
791 (pneigh = pneigh_lookup(&nd_tbl, 793 (pneigh = pneigh_lookup(&nd_tbl, &init_net,
792 &msg->target, dev, 0)) != NULL)) { 794 &msg->target, dev, 0)) != NULL)) {
793 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && 795 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
794 skb->pkt_type != PACKET_HOST && 796 skb->pkt_type != PACKET_HOST &&
@@ -898,7 +900,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
898 return; 900 return;
899 } 901 }
900 } 902 }
901 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) { 903 if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) {
902 if (ifp->flags & IFA_F_TENTATIVE) { 904 if (ifp->flags & IFA_F_TENTATIVE) {
903 addrconf_dad_failure(ifp); 905 addrconf_dad_failure(ifp);
904 return; 906 return;
@@ -929,7 +931,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
929 */ 931 */
930 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && 932 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
931 ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && 933 ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
932 pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { 934 pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
933 /* XXX: idev->cnf.prixy_ndp */ 935 /* XXX: idev->cnf.prixy_ndp */
934 goto out; 936 goto out;
935 } 937 }
@@ -1037,6 +1039,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1037 1039
1038 ndmsg = nlmsg_data(nlh); 1040 ndmsg = nlmsg_data(nlh);
1039 ndmsg->nduseropt_family = AF_INET6; 1041 ndmsg->nduseropt_family = AF_INET6;
1042 ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1040 ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; 1043 ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1041 ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; 1044 ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1042 ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; 1045 ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
@@ -1047,7 +1050,8 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1047 &ipv6_hdr(ra)->saddr); 1050 &ipv6_hdr(ra)->saddr);
1048 nlmsg_end(skb, nlh); 1051 nlmsg_end(skb, nlh);
1049 1052
1050 err = rtnl_notify(skb, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); 1053 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL,
1054 GFP_ATOMIC);
1051 if (err < 0) 1055 if (err < 0)
1052 goto errout; 1056 goto errout;
1053 1057
@@ -1057,7 +1061,7 @@ nla_put_failure:
1057 nlmsg_free(skb); 1061 nlmsg_free(skb);
1058 err = -EMSGSIZE; 1062 err = -EMSGSIZE;
1059errout: 1063errout:
1060 rtnl_set_sk_err(RTNLGRP_ND_USEROPT, err); 1064 rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err);
1061} 1065}
1062 1066
1063static void ndisc_router_discovery(struct sk_buff *skb) 1067static void ndisc_router_discovery(struct sk_buff *skb)
@@ -1293,11 +1297,11 @@ skip_defrtr:
1293 } 1297 }
1294 1298
1295 if (ndopts.nd_useropts) { 1299 if (ndopts.nd_useropts) {
1296 struct nd_opt_hdr *opt; 1300 struct nd_opt_hdr *p;
1297 for (opt = ndopts.nd_useropts; 1301 for (p = ndopts.nd_useropts;
1298 opt; 1302 p;
1299 opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) { 1303 p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1300 ndisc_ra_useropt(skb, opt); 1304 ndisc_ra_useropt(skb, p);
1301 } 1305 }
1302 } 1306 }
1303 1307
@@ -1537,7 +1541,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1537 buff->dst = dst; 1541 buff->dst = dst;
1538 idev = in6_dev_get(dst->dev); 1542 idev = in6_dev_get(dst->dev);
1539 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); 1543 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1540 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); 1544 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1545 dst_output);
1541 if (!err) { 1546 if (!err) {
1542 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); 1547 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
1543 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); 1548 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
@@ -1670,7 +1675,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
1670 filp, buffer, lenp, ppos); 1675 filp, buffer, lenp, ppos);
1671 1676
1672 else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || 1677 else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1673 (strcmp(ctl->procname, "base_reacable_time_ms") == 0)) 1678 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1674 ret = proc_dointvec_ms_jiffies(ctl, write, 1679 ret = proc_dointvec_ms_jiffies(ctl, write,
1675 filp, buffer, lenp, ppos); 1680 filp, buffer, lenp, ppos);
1676 else 1681 else
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index b1326c2bf8aa..2e06724dc348 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -8,6 +8,7 @@
8#include <net/ip6_route.h> 8#include <net/ip6_route.h>
9#include <net/xfrm.h> 9#include <net/xfrm.h>
10#include <net/ip6_checksum.h> 10#include <net/ip6_checksum.h>
11#include <net/netfilter/nf_queue.h>
11 12
12int ip6_route_me_harder(struct sk_buff *skb) 13int ip6_route_me_harder(struct sk_buff *skb)
13{ 14{
@@ -56,11 +57,12 @@ struct ip6_rt_info {
56 struct in6_addr saddr; 57 struct in6_addr saddr;
57}; 58};
58 59
59static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) 60static void nf_ip6_saveroute(const struct sk_buff *skb,
61 struct nf_queue_entry *entry)
60{ 62{
61 struct ip6_rt_info *rt_info = nf_info_reroute(info); 63 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
62 64
63 if (info->hook == NF_IP6_LOCAL_OUT) { 65 if (entry->hook == NF_INET_LOCAL_OUT) {
64 struct ipv6hdr *iph = ipv6_hdr(skb); 66 struct ipv6hdr *iph = ipv6_hdr(skb);
65 67
66 rt_info->daddr = iph->daddr; 68 rt_info->daddr = iph->daddr;
@@ -68,11 +70,12 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
68 } 70 }
69} 71}
70 72
71static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) 73static int nf_ip6_reroute(struct sk_buff *skb,
74 const struct nf_queue_entry *entry)
72{ 75{
73 struct ip6_rt_info *rt_info = nf_info_reroute(info); 76 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
74 77
75 if (info->hook == NF_IP6_LOCAL_OUT) { 78 if (entry->hook == NF_INET_LOCAL_OUT) {
76 struct ipv6hdr *iph = ipv6_hdr(skb); 79 struct ipv6hdr *iph = ipv6_hdr(skb);
77 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || 80 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
78 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) 81 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
@@ -81,6 +84,12 @@ static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
81 return 0; 84 return 0;
82} 85}
83 86
87static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
88{
89 *dst = ip6_route_output(NULL, fl);
90 return (*dst)->error;
91}
92
84__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 93__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
85 unsigned int dataoff, u_int8_t protocol) 94 unsigned int dataoff, u_int8_t protocol)
86{ 95{
@@ -89,7 +98,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
89 98
90 switch (skb->ip_summed) { 99 switch (skb->ip_summed) {
91 case CHECKSUM_COMPLETE: 100 case CHECKSUM_COMPLETE:
92 if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) 101 if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
93 break; 102 break;
94 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 103 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
95 skb->len - dataoff, protocol, 104 skb->len - dataoff, protocol,
@@ -115,9 +124,10 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
115 124
116EXPORT_SYMBOL(nf_ip6_checksum); 125EXPORT_SYMBOL(nf_ip6_checksum);
117 126
118static struct nf_afinfo nf_ip6_afinfo = { 127static const struct nf_afinfo nf_ip6_afinfo = {
119 .family = AF_INET6, 128 .family = AF_INET6,
120 .checksum = nf_ip6_checksum, 129 .checksum = nf_ip6_checksum,
130 .route = nf_ip6_route,
121 .saveroute = nf_ip6_saveroute, 131 .saveroute = nf_ip6_saveroute,
122 .reroute = nf_ip6_reroute, 132 .reroute = nf_ip6_reroute,
123 .route_key_size = sizeof(struct ip6_rt_info), 133 .route_key_size = sizeof(struct ip6_rt_info),
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 838b8ddee8c0..6cae5475737e 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -2,12 +2,13 @@
2# IP netfilter configuration 2# IP netfilter configuration
3# 3#
4 4
5menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" 5menu "IPv6: Netfilter Configuration"
6 depends on INET && IPV6 && NETFILTER && EXPERIMENTAL 6 depends on INET && IPV6 && NETFILTER
7 7
8config NF_CONNTRACK_IPV6 8config NF_CONNTRACK_IPV6
9 tristate "IPv6 connection tracking support (EXPERIMENTAL)" 9 tristate "IPv6 connection tracking support"
10 depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK 10 depends on INET && IPV6 && NF_CONNTRACK
11 default m if NETFILTER_ADVANCED=n
11 ---help--- 12 ---help---
12 Connection tracking keeps a record of what packets have passed 13 Connection tracking keeps a record of what packets have passed
13 through your machine, in order to figure out how they are related 14 through your machine, in order to figure out how they are related
@@ -21,7 +22,8 @@ config NF_CONNTRACK_IPV6
21 22
22config IP6_NF_QUEUE 23config IP6_NF_QUEUE
23 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" 24 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
24 depends on INET && IPV6 && NETFILTER && EXPERIMENTAL 25 depends on INET && IPV6 && NETFILTER
26 depends on NETFILTER_ADVANCED
25 ---help--- 27 ---help---
26 28
27 This option adds a queue handler to the kernel for IPv6 29 This option adds a queue handler to the kernel for IPv6
@@ -42,8 +44,9 @@ config IP6_NF_QUEUE
42 44
43config IP6_NF_IPTABLES 45config IP6_NF_IPTABLES
44 tristate "IP6 tables support (required for filtering)" 46 tristate "IP6 tables support (required for filtering)"
45 depends on INET && IPV6 && EXPERIMENTAL 47 depends on INET && IPV6
46 select NETFILTER_XTABLES 48 select NETFILTER_XTABLES
49 default m if NETFILTER_ADVANCED=n
47 help 50 help
48 ip6tables is a general, extensible packet identification framework. 51 ip6tables is a general, extensible packet identification framework.
49 Currently only the packet filtering and packet mangling subsystem 52 Currently only the packet filtering and packet mangling subsystem
@@ -54,8 +57,9 @@ config IP6_NF_IPTABLES
54 57
55# The simple matches. 58# The simple matches.
56config IP6_NF_MATCH_RT 59config IP6_NF_MATCH_RT
57 tristate "Routing header match support" 60 tristate '"rt" Routing header match support'
58 depends on IP6_NF_IPTABLES 61 depends on IP6_NF_IPTABLES
62 depends on NETFILTER_ADVANCED
59 help 63 help
60 rt matching allows you to match packets based on the routing 64 rt matching allows you to match packets based on the routing
61 header of the packet. 65 header of the packet.
@@ -63,8 +67,9 @@ config IP6_NF_MATCH_RT
63 To compile it as a module, choose M here. If unsure, say N. 67 To compile it as a module, choose M here. If unsure, say N.
64 68
65config IP6_NF_MATCH_OPTS 69config IP6_NF_MATCH_OPTS
66 tristate "Hop-by-hop and Dst opts header match support" 70 tristate '"hopbyhop" and "dst" opts header match support'
67 depends on IP6_NF_IPTABLES 71 depends on IP6_NF_IPTABLES
72 depends on NETFILTER_ADVANCED
68 help 73 help
69 This allows one to match packets based on the hop-by-hop 74 This allows one to match packets based on the hop-by-hop
70 and destination options headers of a packet. 75 and destination options headers of a packet.
@@ -72,8 +77,9 @@ config IP6_NF_MATCH_OPTS
72 To compile it as a module, choose M here. If unsure, say N. 77 To compile it as a module, choose M here. If unsure, say N.
73 78
74config IP6_NF_MATCH_FRAG 79config IP6_NF_MATCH_FRAG
75 tristate "Fragmentation header match support" 80 tristate '"frag" Fragmentation header match support'
76 depends on IP6_NF_IPTABLES 81 depends on IP6_NF_IPTABLES
82 depends on NETFILTER_ADVANCED
77 help 83 help
78 frag matching allows you to match packets based on the fragmentation 84 frag matching allows you to match packets based on the fragmentation
79 header of the packet. 85 header of the packet.
@@ -81,26 +87,19 @@ config IP6_NF_MATCH_FRAG
81 To compile it as a module, choose M here. If unsure, say N. 87 To compile it as a module, choose M here. If unsure, say N.
82 88
83config IP6_NF_MATCH_HL 89config IP6_NF_MATCH_HL
84 tristate "HL match support" 90 tristate '"hl" match support'
85 depends on IP6_NF_IPTABLES 91 depends on IP6_NF_IPTABLES
92 depends on NETFILTER_ADVANCED
86 help 93 help
87 HL matching allows you to match packets based on the hop 94 HL matching allows you to match packets based on the hop
88 limit of the packet. 95 limit of the packet.
89 96
90 To compile it as a module, choose M here. If unsure, say N. 97 To compile it as a module, choose M here. If unsure, say N.
91 98
92config IP6_NF_MATCH_OWNER
93 tristate "Owner match support"
94 depends on IP6_NF_IPTABLES
95 help
96 Packet owner matching allows you to match locally-generated packets
97 based on who created them: the user, group, process or session.
98
99 To compile it as a module, choose M here. If unsure, say N.
100
101config IP6_NF_MATCH_IPV6HEADER 99config IP6_NF_MATCH_IPV6HEADER
102 tristate "IPv6 Extension Headers Match" 100 tristate '"ipv6header" IPv6 Extension Headers Match'
103 depends on IP6_NF_IPTABLES 101 depends on IP6_NF_IPTABLES
102 default m if NETFILTER_ADVANCED=n
104 help 103 help
105 This module allows one to match packets based upon 104 This module allows one to match packets based upon
106 the ipv6 extension headers. 105 the ipv6 extension headers.
@@ -108,24 +107,27 @@ config IP6_NF_MATCH_IPV6HEADER
108 To compile it as a module, choose M here. If unsure, say N. 107 To compile it as a module, choose M here. If unsure, say N.
109 108
110config IP6_NF_MATCH_AH 109config IP6_NF_MATCH_AH
111 tristate "AH match support" 110 tristate '"ah" match support'
112 depends on IP6_NF_IPTABLES 111 depends on IP6_NF_IPTABLES
112 depends on NETFILTER_ADVANCED
113 help 113 help
114 This module allows one to match AH packets. 114 This module allows one to match AH packets.
115 115
116 To compile it as a module, choose M here. If unsure, say N. 116 To compile it as a module, choose M here. If unsure, say N.
117 117
118config IP6_NF_MATCH_MH 118config IP6_NF_MATCH_MH
119 tristate "MH match support" 119 tristate '"mh" match support'
120 depends on IP6_NF_IPTABLES 120 depends on IP6_NF_IPTABLES
121 depends on NETFILTER_ADVANCED
121 help 122 help
122 This module allows one to match MH packets. 123 This module allows one to match MH packets.
123 124
124 To compile it as a module, choose M here. If unsure, say N. 125 To compile it as a module, choose M here. If unsure, say N.
125 126
126config IP6_NF_MATCH_EUI64 127config IP6_NF_MATCH_EUI64
127 tristate "EUI64 address check" 128 tristate '"eui64" address check'
128 depends on IP6_NF_IPTABLES 129 depends on IP6_NF_IPTABLES
130 depends on NETFILTER_ADVANCED
129 help 131 help
130 This module performs checking on the IPv6 source address 132 This module performs checking on the IPv6 source address
131 Compares the last 64 bits with the EUI64 (delivered 133 Compares the last 64 bits with the EUI64 (delivered
@@ -137,6 +139,7 @@ config IP6_NF_MATCH_EUI64
137config IP6_NF_FILTER 139config IP6_NF_FILTER
138 tristate "Packet filtering" 140 tristate "Packet filtering"
139 depends on IP6_NF_IPTABLES 141 depends on IP6_NF_IPTABLES
142 default m if NETFILTER_ADVANCED=n
140 help 143 help
141 Packet filtering defines a table `filter', which has a series of 144 Packet filtering defines a table `filter', which has a series of
142 rules for simple packet filtering at local input, forwarding and 145 rules for simple packet filtering at local input, forwarding and
@@ -147,6 +150,7 @@ config IP6_NF_FILTER
147config IP6_NF_TARGET_LOG 150config IP6_NF_TARGET_LOG
148 tristate "LOG target support" 151 tristate "LOG target support"
149 depends on IP6_NF_FILTER 152 depends on IP6_NF_FILTER
153 default m if NETFILTER_ADVANCED=n
150 help 154 help
151 This option adds a `LOG' target, which allows you to create rules in 155 This option adds a `LOG' target, which allows you to create rules in
152 any iptables table which records the packet header to the syslog. 156 any iptables table which records the packet header to the syslog.
@@ -156,6 +160,7 @@ config IP6_NF_TARGET_LOG
156config IP6_NF_TARGET_REJECT 160config IP6_NF_TARGET_REJECT
157 tristate "REJECT target support" 161 tristate "REJECT target support"
158 depends on IP6_NF_FILTER 162 depends on IP6_NF_FILTER
163 default m if NETFILTER_ADVANCED=n
159 help 164 help
160 The REJECT target allows a filtering rule to specify that an ICMPv6 165 The REJECT target allows a filtering rule to specify that an ICMPv6
161 error should be issued in response to an incoming packet, rather 166 error should be issued in response to an incoming packet, rather
@@ -166,6 +171,7 @@ config IP6_NF_TARGET_REJECT
166config IP6_NF_MANGLE 171config IP6_NF_MANGLE
167 tristate "Packet mangling" 172 tristate "Packet mangling"
168 depends on IP6_NF_IPTABLES 173 depends on IP6_NF_IPTABLES
174 default m if NETFILTER_ADVANCED=n
169 help 175 help
170 This option adds a `mangle' table to iptables: see the man page for 176 This option adds a `mangle' table to iptables: see the man page for
171 iptables(8). This table is used for various packet alterations 177 iptables(8). This table is used for various packet alterations
@@ -176,27 +182,29 @@ config IP6_NF_MANGLE
176config IP6_NF_TARGET_HL 182config IP6_NF_TARGET_HL
177 tristate 'HL (hoplimit) target support' 183 tristate 'HL (hoplimit) target support'
178 depends on IP6_NF_MANGLE 184 depends on IP6_NF_MANGLE
185 depends on NETFILTER_ADVANCED
179 help 186 help
180 This option adds a `HL' target, which enables the user to decrement 187 This option adds a `HL' target, which enables the user to decrement
181 the hoplimit value of the IPv6 header or set it to a given (lower) 188 the hoplimit value of the IPv6 header or set it to a given (lower)
182 value. 189 value.
183 190
184 While it is safe to decrement the hoplimit value, this option also 191 While it is safe to decrement the hoplimit value, this option also
185 enables functionality to increment and set the hoplimit value of the 192 enables functionality to increment and set the hoplimit value of the
186 IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since 193 IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since
187 you can easily create immortal packets that loop forever on the 194 you can easily create immortal packets that loop forever on the
188 network. 195 network.
189 196
190 To compile it as a module, choose M here. If unsure, say N. 197 To compile it as a module, choose M here. If unsure, say N.
191 198
192config IP6_NF_RAW 199config IP6_NF_RAW
193 tristate 'raw table support (required for TRACE)' 200 tristate 'raw table support (required for TRACE)'
194 depends on IP6_NF_IPTABLES 201 depends on IP6_NF_IPTABLES
202 depends on NETFILTER_ADVANCED
195 help 203 help
196 This option adds a `raw' table to ip6tables. This table is the very 204 This option adds a `raw' table to ip6tables. This table is the very
197 first in the netfilter framework and hooks in at the PREROUTING 205 first in the netfilter framework and hooks in at the PREROUTING
198 and OUTPUT chains. 206 and OUTPUT chains.
199 207
200 If you want to compile it as a module, say M here and read 208 If you want to compile it as a module, say M here and read
201 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 209 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
202 210
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 4513eab77397..fbf2c14ed887 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -4,25 +4,28 @@
4 4
5# Link order matters here. 5# Link order matters here.
6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o 6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
7obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
8obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
9obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
10obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
11obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
12obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
13obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
14obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o 7obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
15obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o 8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
16obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o
17obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o 9obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
18obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
19obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o 10obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
20obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
21obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
22obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
23 11
24# objects for l3 independent conntrack 12# objects for l3 independent conntrack
25nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o 13nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
26 14
27# l3 independent conntrack 15# l3 independent conntrack
28obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o 16obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
17
18# matches
19obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
20obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
21obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
22obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
23obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
24obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
25obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
26obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
27
28# targets
29obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o
30obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
31obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 6413a30d9f68..e869916b05f1 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -23,11 +23,13 @@
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
26#include <linux/mutex.h> 27#include <linux/mutex.h>
27#include <net/net_namespace.h> 28#include <net/net_namespace.h>
28#include <net/sock.h> 29#include <net/sock.h>
29#include <net/ipv6.h> 30#include <net/ipv6.h>
30#include <net/ip6_route.h> 31#include <net/ip6_route.h>
32#include <net/netfilter/nf_queue.h>
31#include <linux/netfilter_ipv4/ip_queue.h> 33#include <linux/netfilter_ipv4/ip_queue.h>
32#include <linux/netfilter_ipv4/ip_tables.h> 34#include <linux/netfilter_ipv4/ip_tables.h>
33#include <linux/netfilter_ipv6/ip6_tables.h> 35#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -37,13 +39,7 @@
37#define NET_IPQ_QMAX 2088 39#define NET_IPQ_QMAX 2088
38#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" 40#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
39 41
40struct ipq_queue_entry { 42typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
41 struct list_head list;
42 struct nf_info *info;
43 struct sk_buff *skb;
44};
45
46typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
47 43
48static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
49static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
@@ -57,70 +53,13 @@ static struct sock *ipqnl __read_mostly;
57static LIST_HEAD(queue_list); 53static LIST_HEAD(queue_list);
58static DEFINE_MUTEX(ipqnl_mutex); 54static DEFINE_MUTEX(ipqnl_mutex);
59 55
60static void
61ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
62{
63 local_bh_disable();
64 nf_reinject(entry->skb, entry->info, verdict);
65 local_bh_enable();
66 kfree(entry);
67}
68
69static inline void 56static inline void
70__ipq_enqueue_entry(struct ipq_queue_entry *entry) 57__ipq_enqueue_entry(struct nf_queue_entry *entry)
71{ 58{
72 list_add(&entry->list, &queue_list); 59 list_add_tail(&entry->list, &queue_list);
73 queue_total++; 60 queue_total++;
74} 61}
75 62
76/*
77 * Find and return a queued entry matched by cmpfn, or return the last
78 * entry if cmpfn is NULL.
79 */
80static inline struct ipq_queue_entry *
81__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
82{
83 struct list_head *p;
84
85 list_for_each_prev(p, &queue_list) {
86 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
87
88 if (!cmpfn || cmpfn(entry, data))
89 return entry;
90 }
91 return NULL;
92}
93
94static inline void
95__ipq_dequeue_entry(struct ipq_queue_entry *entry)
96{
97 list_del(&entry->list);
98 queue_total--;
99}
100
101static inline struct ipq_queue_entry *
102__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
103{
104 struct ipq_queue_entry *entry;
105
106 entry = __ipq_find_entry(cmpfn, data);
107 if (entry == NULL)
108 return NULL;
109
110 __ipq_dequeue_entry(entry);
111 return entry;
112}
113
114
115static inline void
116__ipq_flush(int verdict)
117{
118 struct ipq_queue_entry *entry;
119
120 while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
121 ipq_issue_verdict(entry, verdict);
122}
123
124static inline int 63static inline int
125__ipq_set_mode(unsigned char mode, unsigned int range) 64__ipq_set_mode(unsigned char mode, unsigned int range)
126{ 65{
@@ -147,36 +86,64 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
147 return status; 86 return status;
148} 87}
149 88
89static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
90
150static inline void 91static inline void
151__ipq_reset(void) 92__ipq_reset(void)
152{ 93{
153 peer_pid = 0; 94 peer_pid = 0;
154 net_disable_timestamp(); 95 net_disable_timestamp();
155 __ipq_set_mode(IPQ_COPY_NONE, 0); 96 __ipq_set_mode(IPQ_COPY_NONE, 0);
156 __ipq_flush(NF_DROP); 97 __ipq_flush(NULL, 0);
157} 98}
158 99
159static struct ipq_queue_entry * 100static struct nf_queue_entry *
160ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 101ipq_find_dequeue_entry(unsigned long id)
161{ 102{
162 struct ipq_queue_entry *entry; 103 struct nf_queue_entry *entry = NULL, *i;
163 104
164 write_lock_bh(&queue_lock); 105 write_lock_bh(&queue_lock);
165 entry = __ipq_find_dequeue_entry(cmpfn, data); 106
107 list_for_each_entry(i, &queue_list, list) {
108 if ((unsigned long)i == id) {
109 entry = i;
110 break;
111 }
112 }
113
114 if (entry) {
115 list_del(&entry->list);
116 queue_total--;
117 }
118
166 write_unlock_bh(&queue_lock); 119 write_unlock_bh(&queue_lock);
167 return entry; 120 return entry;
168} 121}
169 122
170static void 123static void
171ipq_flush(int verdict) 124__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
125{
126 struct nf_queue_entry *entry, *next;
127
128 list_for_each_entry_safe(entry, next, &queue_list, list) {
129 if (!cmpfn || cmpfn(entry, data)) {
130 list_del(&entry->list);
131 queue_total--;
132 nf_reinject(entry, NF_DROP);
133 }
134 }
135}
136
137static void
138ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
172{ 139{
173 write_lock_bh(&queue_lock); 140 write_lock_bh(&queue_lock);
174 __ipq_flush(verdict); 141 __ipq_flush(cmpfn, data);
175 write_unlock_bh(&queue_lock); 142 write_unlock_bh(&queue_lock);
176} 143}
177 144
178static struct sk_buff * 145static struct sk_buff *
179ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 146ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
180{ 147{
181 sk_buff_data_t old_tail; 148 sk_buff_data_t old_tail;
182 size_t size = 0; 149 size_t size = 0;
@@ -233,20 +200,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
233 pmsg->timestamp_sec = tv.tv_sec; 200 pmsg->timestamp_sec = tv.tv_sec;
234 pmsg->timestamp_usec = tv.tv_usec; 201 pmsg->timestamp_usec = tv.tv_usec;
235 pmsg->mark = entry->skb->mark; 202 pmsg->mark = entry->skb->mark;
236 pmsg->hook = entry->info->hook; 203 pmsg->hook = entry->hook;
237 pmsg->hw_protocol = entry->skb->protocol; 204 pmsg->hw_protocol = entry->skb->protocol;
238 205
239 if (entry->info->indev) 206 if (entry->indev)
240 strcpy(pmsg->indev_name, entry->info->indev->name); 207 strcpy(pmsg->indev_name, entry->indev->name);
241 else 208 else
242 pmsg->indev_name[0] = '\0'; 209 pmsg->indev_name[0] = '\0';
243 210
244 if (entry->info->outdev) 211 if (entry->outdev)
245 strcpy(pmsg->outdev_name, entry->info->outdev->name); 212 strcpy(pmsg->outdev_name, entry->outdev->name);
246 else 213 else
247 pmsg->outdev_name[0] = '\0'; 214 pmsg->outdev_name[0] = '\0';
248 215
249 if (entry->info->indev && entry->skb->dev) { 216 if (entry->indev && entry->skb->dev) {
250 pmsg->hw_type = entry->skb->dev->type; 217 pmsg->hw_type = entry->skb->dev->type;
251 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); 218 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
252 } 219 }
@@ -267,28 +234,17 @@ nlmsg_failure:
267} 234}
268 235
269static int 236static int
270ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 237ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
271 unsigned int queuenum, void *data)
272{ 238{
273 int status = -EINVAL; 239 int status = -EINVAL;
274 struct sk_buff *nskb; 240 struct sk_buff *nskb;
275 struct ipq_queue_entry *entry;
276 241
277 if (copy_mode == IPQ_COPY_NONE) 242 if (copy_mode == IPQ_COPY_NONE)
278 return -EAGAIN; 243 return -EAGAIN;
279 244
280 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
281 if (entry == NULL) {
282 printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
283 return -ENOMEM;
284 }
285
286 entry->info = info;
287 entry->skb = skb;
288
289 nskb = ipq_build_packet_message(entry, &status); 245 nskb = ipq_build_packet_message(entry, &status);
290 if (nskb == NULL) 246 if (nskb == NULL)
291 goto err_out_free; 247 return status;
292 248
293 write_lock_bh(&queue_lock); 249 write_lock_bh(&queue_lock);
294 250
@@ -322,14 +278,11 @@ err_out_free_nskb:
322 278
323err_out_unlock: 279err_out_unlock:
324 write_unlock_bh(&queue_lock); 280 write_unlock_bh(&queue_lock);
325
326err_out_free:
327 kfree(entry);
328 return status; 281 return status;
329} 282}
330 283
331static int 284static int
332ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) 285ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
333{ 286{
334 int diff; 287 int diff;
335 int err; 288 int err;
@@ -364,21 +317,15 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
364 return 0; 317 return 0;
365} 318}
366 319
367static inline int
368id_cmp(struct ipq_queue_entry *e, unsigned long id)
369{
370 return (id == (unsigned long )e);
371}
372
373static int 320static int
374ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 321ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
375{ 322{
376 struct ipq_queue_entry *entry; 323 struct nf_queue_entry *entry;
377 324
378 if (vmsg->value > NF_MAX_VERDICT) 325 if (vmsg->value > NF_MAX_VERDICT)
379 return -EINVAL; 326 return -EINVAL;
380 327
381 entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); 328 entry = ipq_find_dequeue_entry(vmsg->id);
382 if (entry == NULL) 329 if (entry == NULL)
383 return -ENOENT; 330 return -ENOENT;
384 else { 331 else {
@@ -388,7 +335,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
388 if (ipq_mangle_ipv6(vmsg, entry) < 0) 335 if (ipq_mangle_ipv6(vmsg, entry) < 0)
389 verdict = NF_DROP; 336 verdict = NF_DROP;
390 337
391 ipq_issue_verdict(entry, verdict); 338 nf_reinject(entry, verdict);
392 return 0; 339 return 0;
393 } 340 }
394} 341}
@@ -433,26 +380,32 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
433} 380}
434 381
435static int 382static int
436dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) 383dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
437{ 384{
438 if (entry->info->indev) 385 if (entry->indev)
439 if (entry->info->indev->ifindex == ifindex) 386 if (entry->indev->ifindex == ifindex)
440 return 1; 387 return 1;
441 388
442 if (entry->info->outdev) 389 if (entry->outdev)
443 if (entry->info->outdev->ifindex == ifindex) 390 if (entry->outdev->ifindex == ifindex)
444 return 1; 391 return 1;
445 392#ifdef CONFIG_BRIDGE_NETFILTER
393 if (entry->skb->nf_bridge) {
394 if (entry->skb->nf_bridge->physindev &&
395 entry->skb->nf_bridge->physindev->ifindex == ifindex)
396 return 1;
397 if (entry->skb->nf_bridge->physoutdev &&
398 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
399 return 1;
400 }
401#endif
446 return 0; 402 return 0;
447} 403}
448 404
449static void 405static void
450ipq_dev_drop(int ifindex) 406ipq_dev_drop(int ifindex)
451{ 407{
452 struct ipq_queue_entry *entry; 408 ipq_flush(dev_cmp, ifindex);
453
454 while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
455 ipq_issue_verdict(entry, NF_DROP);
456} 409}
457 410
458#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 411#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -562,6 +515,7 @@ static struct notifier_block ipq_nl_notifier = {
562 .notifier_call = ipq_rcv_nl_event, 515 .notifier_call = ipq_rcv_nl_event,
563}; 516};
564 517
518#ifdef CONFIG_SYSCTL
565static struct ctl_table_header *ipq_sysctl_header; 519static struct ctl_table_header *ipq_sysctl_header;
566 520
567static ctl_table ipq_table[] = { 521static ctl_table ipq_table[] = {
@@ -575,36 +529,14 @@ static ctl_table ipq_table[] = {
575 }, 529 },
576 { .ctl_name = 0 } 530 { .ctl_name = 0 }
577}; 531};
578 532#endif
579static ctl_table ipq_dir_table[] = {
580 {
581 .ctl_name = NET_IPV6,
582 .procname = "ipv6",
583 .mode = 0555,
584 .child = ipq_table
585 },
586 { .ctl_name = 0 }
587};
588
589static ctl_table ipq_root_table[] = {
590 {
591 .ctl_name = CTL_NET,
592 .procname = "net",
593 .mode = 0555,
594 .child = ipq_dir_table
595 },
596 { .ctl_name = 0 }
597};
598 533
599#ifdef CONFIG_PROC_FS 534#ifdef CONFIG_PROC_FS
600static int 535static int ip6_queue_show(struct seq_file *m, void *v)
601ipq_get_info(char *buffer, char **start, off_t offset, int length)
602{ 536{
603 int len;
604
605 read_lock_bh(&queue_lock); 537 read_lock_bh(&queue_lock);
606 538
607 len = sprintf(buffer, 539 seq_printf(m,
608 "Peer PID : %d\n" 540 "Peer PID : %d\n"
609 "Copy mode : %hu\n" 541 "Copy mode : %hu\n"
610 "Copy range : %u\n" 542 "Copy range : %u\n"
@@ -621,18 +553,24 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
621 queue_user_dropped); 553 queue_user_dropped);
622 554
623 read_unlock_bh(&queue_lock); 555 read_unlock_bh(&queue_lock);
556 return 0;
557}
624 558
625 *start = buffer + offset; 559static int ip6_queue_open(struct inode *inode, struct file *file)
626 len -= offset; 560{
627 if (len > length) 561 return single_open(file, ip6_queue_show, NULL);
628 len = length;
629 else if (len < 0)
630 len = 0;
631 return len;
632} 562}
633#endif /* CONFIG_PROC_FS */
634 563
635static struct nf_queue_handler nfqh = { 564static const struct file_operations ip6_queue_proc_fops = {
565 .open = ip6_queue_open,
566 .read = seq_read,
567 .llseek = seq_lseek,
568 .release = single_release,
569 .owner = THIS_MODULE,
570};
571#endif
572
573static const struct nf_queue_handler nfqh = {
636 .name = "ip6_queue", 574 .name = "ip6_queue",
637 .outfn = &ipq_enqueue_packet, 575 .outfn = &ipq_enqueue_packet,
638}; 576};
@@ -640,7 +578,7 @@ static struct nf_queue_handler nfqh = {
640static int __init ip6_queue_init(void) 578static int __init ip6_queue_init(void)
641{ 579{
642 int status = -ENOMEM; 580 int status = -ENOMEM;
643 struct proc_dir_entry *proc; 581 struct proc_dir_entry *proc __maybe_unused;
644 582
645 netlink_register_notifier(&ipq_nl_notifier); 583 netlink_register_notifier(&ipq_nl_notifier);
646 ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, 584 ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
@@ -650,17 +588,20 @@ static int __init ip6_queue_init(void)
650 goto cleanup_netlink_notifier; 588 goto cleanup_netlink_notifier;
651 } 589 }
652 590
653 proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); 591#ifdef CONFIG_PROC_FS
654 if (proc) 592 proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
593 if (proc) {
655 proc->owner = THIS_MODULE; 594 proc->owner = THIS_MODULE;
656 else { 595 proc->proc_fops = &ip6_queue_proc_fops;
596 } else {
657 printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); 597 printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
658 goto cleanup_ipqnl; 598 goto cleanup_ipqnl;
659 } 599 }
660 600#endif
661 register_netdevice_notifier(&ipq_dev_notifier); 601 register_netdevice_notifier(&ipq_dev_notifier);
662 ipq_sysctl_header = register_sysctl_table(ipq_root_table); 602#ifdef CONFIG_SYSCTL
663 603 ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
604#endif
664 status = nf_register_queue_handler(PF_INET6, &nfqh); 605 status = nf_register_queue_handler(PF_INET6, &nfqh);
665 if (status < 0) { 606 if (status < 0) {
666 printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); 607 printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
@@ -669,12 +610,14 @@ static int __init ip6_queue_init(void)
669 return status; 610 return status;
670 611
671cleanup_sysctl: 612cleanup_sysctl:
613#ifdef CONFIG_SYSCTL
672 unregister_sysctl_table(ipq_sysctl_header); 614 unregister_sysctl_table(ipq_sysctl_header);
615#endif
673 unregister_netdevice_notifier(&ipq_dev_notifier); 616 unregister_netdevice_notifier(&ipq_dev_notifier);
674 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 617 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
675 618
676cleanup_ipqnl: 619cleanup_ipqnl: __maybe_unused
677 sock_release(ipqnl->sk_socket); 620 netlink_kernel_release(ipqnl);
678 mutex_lock(&ipqnl_mutex); 621 mutex_lock(&ipqnl_mutex);
679 mutex_unlock(&ipqnl_mutex); 622 mutex_unlock(&ipqnl_mutex);
680 623
@@ -687,13 +630,15 @@ static void __exit ip6_queue_fini(void)
687{ 630{
688 nf_unregister_queue_handlers(&nfqh); 631 nf_unregister_queue_handlers(&nfqh);
689 synchronize_net(); 632 synchronize_net();
690 ipq_flush(NF_DROP); 633 ipq_flush(NULL, 0);
691 634
635#ifdef CONFIG_SYSCTL
692 unregister_sysctl_table(ipq_sysctl_header); 636 unregister_sysctl_table(ipq_sysctl_header);
637#endif
693 unregister_netdevice_notifier(&ipq_dev_notifier); 638 unregister_netdevice_notifier(&ipq_dev_notifier);
694 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 639 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
695 640
696 sock_release(ipqnl->sk_socket); 641 netlink_kernel_release(ipqnl);
697 mutex_lock(&ipqnl_mutex); 642 mutex_lock(&ipqnl_mutex);
698 mutex_unlock(&ipqnl_mutex); 643 mutex_unlock(&ipqnl_mutex);
699 644
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index acaba1537931..bf9bb6e55bb5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -19,21 +19,21 @@
19#include <linux/poison.h> 19#include <linux/poison.h>
20#include <linux/icmpv6.h> 20#include <linux/icmpv6.h>
21#include <net/ipv6.h> 21#include <net/ipv6.h>
22#include <net/compat.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23#include <linux/mutex.h> 24#include <linux/mutex.h>
24#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
26#include <linux/err.h>
25#include <linux/cpumask.h> 27#include <linux/cpumask.h>
26 28
27#include <linux/netfilter_ipv6/ip6_tables.h> 29#include <linux/netfilter_ipv6/ip6_tables.h>
28#include <linux/netfilter/x_tables.h> 30#include <linux/netfilter/x_tables.h>
31#include <net/netfilter/nf_log.h>
29 32
30MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 34MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
32MODULE_DESCRIPTION("IPv6 packet filter"); 35MODULE_DESCRIPTION("IPv6 packet filter");
33 36
34#define IPV6_HDR_LEN (sizeof(struct ipv6hdr))
35#define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr))
36
37/*#define DEBUG_IP_FIREWALL*/ 37/*#define DEBUG_IP_FIREWALL*/
38/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ 38/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39/*#define DEBUG_IP_FIREWALL_USER*/ 39/*#define DEBUG_IP_FIREWALL_USER*/
@@ -76,12 +76,6 @@ do { \
76 76
77 Hence the start of any table is given by get_table() below. */ 77 Hence the start of any table is given by get_table() below. */
78 78
79#if 0
80#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
81#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
82#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
83#endif
84
85/* Check for an extension */ 79/* Check for an extension */
86int 80int
87ip6t_ext_hdr(u8 nexthdr) 81ip6t_ext_hdr(u8 nexthdr)
@@ -96,6 +90,7 @@ ip6t_ext_hdr(u8 nexthdr)
96} 90}
97 91
98/* Returns whether matches rule or not. */ 92/* Returns whether matches rule or not. */
93/* Performance critical - called for every packet */
99static inline bool 94static inline bool
100ip6_packet_match(const struct sk_buff *skb, 95ip6_packet_match(const struct sk_buff *skb,
101 const char *indev, 96 const char *indev,
@@ -108,7 +103,7 @@ ip6_packet_match(const struct sk_buff *skb,
108 unsigned long ret; 103 unsigned long ret;
109 const struct ipv6hdr *ipv6 = ipv6_hdr(skb); 104 const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
110 105
111#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) 106#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
112 107
113 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, 108 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
114 &ip6info->src), IP6T_INV_SRCIP) 109 &ip6info->src), IP6T_INV_SRCIP)
@@ -188,7 +183,7 @@ ip6_packet_match(const struct sk_buff *skb,
188} 183}
189 184
190/* should be ip6 safe */ 185/* should be ip6 safe */
191static inline bool 186static bool
192ip6_checkentry(const struct ip6t_ip6 *ipv6) 187ip6_checkentry(const struct ip6t_ip6 *ipv6)
193{ 188{
194 if (ipv6->flags & ~IP6T_F_MASK) { 189 if (ipv6->flags & ~IP6T_F_MASK) {
@@ -218,8 +213,9 @@ ip6t_error(struct sk_buff *skb,
218 return NF_DROP; 213 return NF_DROP;
219} 214}
220 215
221static inline 216/* Performance critical - called for every packet */
222bool do_match(struct ip6t_entry_match *m, 217static inline bool
218do_match(struct ip6t_entry_match *m,
223 const struct sk_buff *skb, 219 const struct sk_buff *skb,
224 const struct net_device *in, 220 const struct net_device *in,
225 const struct net_device *out, 221 const struct net_device *out,
@@ -242,6 +238,7 @@ get_entry(void *base, unsigned int offset)
242} 238}
243 239
244/* All zeroes == unconditional rule. */ 240/* All zeroes == unconditional rule. */
241/* Mildly perf critical (only if packet tracing is on) */
245static inline int 242static inline int
246unconditional(const struct ip6t_ip6 *ipv6) 243unconditional(const struct ip6t_ip6 *ipv6)
247{ 244{
@@ -257,12 +254,12 @@ unconditional(const struct ip6t_ip6 *ipv6)
257#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 254#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
258 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 255 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
259/* This cries for unification! */ 256/* This cries for unification! */
260static const char *hooknames[] = { 257static const char *const hooknames[] = {
261 [NF_IP6_PRE_ROUTING] = "PREROUTING", 258 [NF_INET_PRE_ROUTING] = "PREROUTING",
262 [NF_IP6_LOCAL_IN] = "INPUT", 259 [NF_INET_LOCAL_IN] = "INPUT",
263 [NF_IP6_FORWARD] = "FORWARD", 260 [NF_INET_FORWARD] = "FORWARD",
264 [NF_IP6_LOCAL_OUT] = "OUTPUT", 261 [NF_INET_LOCAL_OUT] = "OUTPUT",
265 [NF_IP6_POST_ROUTING] = "POSTROUTING", 262 [NF_INET_POST_ROUTING] = "POSTROUTING",
266}; 263};
267 264
268enum nf_ip_trace_comments { 265enum nf_ip_trace_comments {
@@ -271,7 +268,7 @@ enum nf_ip_trace_comments {
271 NF_IP6_TRACE_COMMENT_POLICY, 268 NF_IP6_TRACE_COMMENT_POLICY,
272}; 269};
273 270
274static const char *comments[] = { 271static const char *const comments[] = {
275 [NF_IP6_TRACE_COMMENT_RULE] = "rule", 272 [NF_IP6_TRACE_COMMENT_RULE] = "rule",
276 [NF_IP6_TRACE_COMMENT_RETURN] = "return", 273 [NF_IP6_TRACE_COMMENT_RETURN] = "return",
277 [NF_IP6_TRACE_COMMENT_POLICY] = "policy", 274 [NF_IP6_TRACE_COMMENT_POLICY] = "policy",
@@ -287,6 +284,7 @@ static struct nf_loginfo trace_loginfo = {
287 }, 284 },
288}; 285};
289 286
287/* Mildly perf critical (only if packet tracing is on) */
290static inline int 288static inline int
291get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, 289get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
292 char *hookname, char **chainname, 290 char *hookname, char **chainname,
@@ -322,7 +320,7 @@ static void trace_packet(struct sk_buff *skb,
322 unsigned int hook, 320 unsigned int hook,
323 const struct net_device *in, 321 const struct net_device *in,
324 const struct net_device *out, 322 const struct net_device *out,
325 char *tablename, 323 const char *tablename,
326 struct xt_table_info *private, 324 struct xt_table_info *private,
327 struct ip6t_entry *e) 325 struct ip6t_entry *e)
328{ 326{
@@ -378,8 +376,8 @@ ip6t_do_table(struct sk_buff *skb,
378 * match it. */ 376 * match it. */
379 377
380 read_lock_bh(&table->lock); 378 read_lock_bh(&table->lock);
381 private = table->private;
382 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 379 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
380 private = table->private;
383 table_base = (void *)private->entries[smp_processor_id()]; 381 table_base = (void *)private->entries[smp_processor_id()];
384 e = get_entry(table_base, private->hook_entry[hook]); 382 e = get_entry(table_base, private->hook_entry[hook]);
385 383
@@ -399,9 +397,8 @@ ip6t_do_table(struct sk_buff *skb,
399 goto no_match; 397 goto no_match;
400 398
401 ADD_COUNTER(e->counters, 399 ADD_COUNTER(e->counters,
402 ntohs(ipv6_hdr(skb)->payload_len) 400 ntohs(ipv6_hdr(skb)->payload_len) +
403 + IPV6_HDR_LEN, 401 sizeof(struct ipv6hdr), 1);
404 1);
405 402
406 t = ip6t_get_target(e); 403 t = ip6t_get_target(e);
407 IP_NF_ASSERT(t->u.kernel.target); 404 IP_NF_ASSERT(t->u.kernel.target);
@@ -502,11 +499,9 @@ mark_source_chains(struct xt_table_info *newinfo,
502 499
503 /* No recursion; use packet counter to save back ptrs (reset 500 /* No recursion; use packet counter to save back ptrs (reset
504 to 0 as we leave), and comefrom to save source hook bitmask */ 501 to 0 as we leave), and comefrom to save source hook bitmask */
505 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { 502 for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
506 unsigned int pos = newinfo->hook_entry[hook]; 503 unsigned int pos = newinfo->hook_entry[hook];
507 struct ip6t_entry *e 504 struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos);
508 = (struct ip6t_entry *)(entry0 + pos);
509 int visited = e->comefrom & (1 << hook);
510 505
511 if (!(valid_hooks & (1 << hook))) 506 if (!(valid_hooks & (1 << hook)))
512 continue; 507 continue;
@@ -517,14 +512,14 @@ mark_source_chains(struct xt_table_info *newinfo,
517 for (;;) { 512 for (;;) {
518 struct ip6t_standard_target *t 513 struct ip6t_standard_target *t
519 = (void *)ip6t_get_target(e); 514 = (void *)ip6t_get_target(e);
515 int visited = e->comefrom & (1 << hook);
520 516
521 if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) { 517 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
522 printk("iptables: loop hook %u pos %u %08X.\n", 518 printk("iptables: loop hook %u pos %u %08X.\n",
523 hook, pos, e->comefrom); 519 hook, pos, e->comefrom);
524 return 0; 520 return 0;
525 } 521 }
526 e->comefrom 522 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
527 |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
528 523
529 /* Unconditional return/END. */ 524 /* Unconditional return/END. */
530 if ((e->target_offset == sizeof(struct ip6t_entry) 525 if ((e->target_offset == sizeof(struct ip6t_entry)
@@ -544,10 +539,10 @@ mark_source_chains(struct xt_table_info *newinfo,
544 /* Return: backtrack through the last 539 /* Return: backtrack through the last
545 big jump. */ 540 big jump. */
546 do { 541 do {
547 e->comefrom ^= (1<<NF_IP6_NUMHOOKS); 542 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
548#ifdef DEBUG_IP_FIREWALL_USER 543#ifdef DEBUG_IP_FIREWALL_USER
549 if (e->comefrom 544 if (e->comefrom
550 & (1 << NF_IP6_NUMHOOKS)) { 545 & (1 << NF_INET_NUMHOOKS)) {
551 duprintf("Back unset " 546 duprintf("Back unset "
552 "on hook %u " 547 "on hook %u "
553 "rule %u\n", 548 "rule %u\n",
@@ -604,7 +599,7 @@ mark_source_chains(struct xt_table_info *newinfo,
604 return 1; 599 return 1;
605} 600}
606 601
607static inline int 602static int
608cleanup_match(struct ip6t_entry_match *m, unsigned int *i) 603cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
609{ 604{
610 if (i && (*i)-- == 0) 605 if (i && (*i)-- == 0)
@@ -616,102 +611,135 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
616 return 0; 611 return 0;
617} 612}
618 613
619static inline int 614static int
620check_match(struct ip6t_entry_match *m, 615check_entry(struct ip6t_entry *e, const char *name)
621 const char *name, 616{
622 const struct ip6t_ip6 *ipv6, 617 struct ip6t_entry_target *t;
623 unsigned int hookmask, 618
624 unsigned int *i) 619 if (!ip6_checkentry(&e->ipv6)) {
620 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
621 return -EINVAL;
622 }
623
624 if (e->target_offset + sizeof(struct ip6t_entry_target) >
625 e->next_offset)
626 return -EINVAL;
627
628 t = ip6t_get_target(e);
629 if (e->target_offset + t->u.target_size > e->next_offset)
630 return -EINVAL;
631
632 return 0;
633}
634
635static int check_match(struct ip6t_entry_match *m, const char *name,
636 const struct ip6t_ip6 *ipv6,
637 unsigned int hookmask, unsigned int *i)
638{
639 struct xt_match *match;
640 int ret;
641
642 match = m->u.kernel.match;
643 ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m),
644 name, hookmask, ipv6->proto,
645 ipv6->invflags & IP6T_INV_PROTO);
646 if (!ret && m->u.kernel.match->checkentry
647 && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
648 hookmask)) {
649 duprintf("ip_tables: check failed for `%s'.\n",
650 m->u.kernel.match->name);
651 ret = -EINVAL;
652 }
653 if (!ret)
654 (*i)++;
655 return ret;
656}
657
658static int
659find_check_match(struct ip6t_entry_match *m,
660 const char *name,
661 const struct ip6t_ip6 *ipv6,
662 unsigned int hookmask,
663 unsigned int *i)
625{ 664{
626 struct xt_match *match; 665 struct xt_match *match;
627 int ret; 666 int ret;
628 667
629 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, 668 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
630 m->u.user.revision), 669 m->u.user.revision),
631 "ip6t_%s", m->u.user.name); 670 "ip6t_%s", m->u.user.name);
632 if (IS_ERR(match) || !match) { 671 if (IS_ERR(match) || !match) {
633 duprintf("check_match: `%s' not found\n", m->u.user.name); 672 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
634 return match ? PTR_ERR(match) : -ENOENT; 673 return match ? PTR_ERR(match) : -ENOENT;
635 } 674 }
636 m->u.kernel.match = match; 675 m->u.kernel.match = match;
637 676
638 ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), 677 ret = check_match(m, name, ipv6, hookmask, i);
639 name, hookmask, ipv6->proto,
640 ipv6->invflags & IP6T_INV_PROTO);
641 if (ret) 678 if (ret)
642 goto err; 679 goto err;
643 680
644 if (m->u.kernel.match->checkentry
645 && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
646 hookmask)) {
647 duprintf("ip_tables: check failed for `%s'.\n",
648 m->u.kernel.match->name);
649 ret = -EINVAL;
650 goto err;
651 }
652
653 (*i)++;
654 return 0; 681 return 0;
655err: 682err:
656 module_put(m->u.kernel.match->me); 683 module_put(m->u.kernel.match->me);
657 return ret; 684 return ret;
658} 685}
659 686
660static struct xt_target ip6t_standard_target; 687static int check_target(struct ip6t_entry *e, const char *name)
661
662static inline int
663check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
664 unsigned int *i)
665{ 688{
666 struct ip6t_entry_target *t; 689 struct ip6t_entry_target *t;
667 struct xt_target *target; 690 struct xt_target *target;
668 int ret; 691 int ret;
669 unsigned int j;
670 692
671 if (!ip6_checkentry(&e->ipv6)) { 693 t = ip6t_get_target(e);
672 duprintf("ip_tables: ip check failed %p %s.\n", e, name); 694 target = t->u.kernel.target;
673 return -EINVAL; 695 ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t),
696 name, e->comefrom, e->ipv6.proto,
697 e->ipv6.invflags & IP6T_INV_PROTO);
698 if (!ret && t->u.kernel.target->checkentry
699 && !t->u.kernel.target->checkentry(name, e, target, t->data,
700 e->comefrom)) {
701 duprintf("ip_tables: check failed for `%s'.\n",
702 t->u.kernel.target->name);
703 ret = -EINVAL;
674 } 704 }
705 return ret;
706}
675 707
676 if (e->target_offset + sizeof(struct ip6t_entry_target) > 708static int
677 e->next_offset) 709find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
678 return -EINVAL; 710 unsigned int *i)
711{
712 struct ip6t_entry_target *t;
713 struct xt_target *target;
714 int ret;
715 unsigned int j;
716
717 ret = check_entry(e, name);
718 if (ret)
719 return ret;
679 720
680 j = 0; 721 j = 0;
681 ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j); 722 ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6,
723 e->comefrom, &j);
682 if (ret != 0) 724 if (ret != 0)
683 goto cleanup_matches; 725 goto cleanup_matches;
684 726
685 t = ip6t_get_target(e); 727 t = ip6t_get_target(e);
686 ret = -EINVAL;
687 if (e->target_offset + t->u.target_size > e->next_offset)
688 goto cleanup_matches;
689 target = try_then_request_module(xt_find_target(AF_INET6, 728 target = try_then_request_module(xt_find_target(AF_INET6,
690 t->u.user.name, 729 t->u.user.name,
691 t->u.user.revision), 730 t->u.user.revision),
692 "ip6t_%s", t->u.user.name); 731 "ip6t_%s", t->u.user.name);
693 if (IS_ERR(target) || !target) { 732 if (IS_ERR(target) || !target) {
694 duprintf("check_entry: `%s' not found\n", t->u.user.name); 733 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
695 ret = target ? PTR_ERR(target) : -ENOENT; 734 ret = target ? PTR_ERR(target) : -ENOENT;
696 goto cleanup_matches; 735 goto cleanup_matches;
697 } 736 }
698 t->u.kernel.target = target; 737 t->u.kernel.target = target;
699 738
700 ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), 739 ret = check_target(e, name);
701 name, e->comefrom, e->ipv6.proto,
702 e->ipv6.invflags & IP6T_INV_PROTO);
703 if (ret) 740 if (ret)
704 goto err; 741 goto err;
705 742
706 if (t->u.kernel.target->checkentry
707 && !t->u.kernel.target->checkentry(name, e, target, t->data,
708 e->comefrom)) {
709 duprintf("ip_tables: check failed for `%s'.\n",
710 t->u.kernel.target->name);
711 ret = -EINVAL;
712 goto err;
713 }
714
715 (*i)++; 743 (*i)++;
716 return 0; 744 return 0;
717 err: 745 err:
@@ -721,7 +749,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
721 return ret; 749 return ret;
722} 750}
723 751
724static inline int 752static int
725check_entry_size_and_hooks(struct ip6t_entry *e, 753check_entry_size_and_hooks(struct ip6t_entry *e,
726 struct xt_table_info *newinfo, 754 struct xt_table_info *newinfo,
727 unsigned char *base, 755 unsigned char *base,
@@ -746,7 +774,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
746 } 774 }
747 775
748 /* Check hooks & underflows */ 776 /* Check hooks & underflows */
749 for (h = 0; h < NF_IP6_NUMHOOKS; h++) { 777 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
750 if ((unsigned char *)e - base == hook_entries[h]) 778 if ((unsigned char *)e - base == hook_entries[h])
751 newinfo->hook_entry[h] = hook_entries[h]; 779 newinfo->hook_entry[h] = hook_entries[h];
752 if ((unsigned char *)e - base == underflows[h]) 780 if ((unsigned char *)e - base == underflows[h])
@@ -764,7 +792,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
764 return 0; 792 return 0;
765} 793}
766 794
767static inline int 795static int
768cleanup_entry(struct ip6t_entry *e, unsigned int *i) 796cleanup_entry(struct ip6t_entry *e, unsigned int *i)
769{ 797{
770 struct ip6t_entry_target *t; 798 struct ip6t_entry_target *t;
@@ -800,7 +828,7 @@ translate_table(const char *name,
800 newinfo->number = number; 828 newinfo->number = number;
801 829
802 /* Init all hooks to impossible value. */ 830 /* Init all hooks to impossible value. */
803 for (i = 0; i < NF_IP6_NUMHOOKS; i++) { 831 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
804 newinfo->hook_entry[i] = 0xFFFFFFFF; 832 newinfo->hook_entry[i] = 0xFFFFFFFF;
805 newinfo->underflow[i] = 0xFFFFFFFF; 833 newinfo->underflow[i] = 0xFFFFFFFF;
806 } 834 }
@@ -824,7 +852,7 @@ translate_table(const char *name,
824 } 852 }
825 853
826 /* Check hooks all assigned */ 854 /* Check hooks all assigned */
827 for (i = 0; i < NF_IP6_NUMHOOKS; i++) { 855 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
828 /* Only hooks which are valid */ 856 /* Only hooks which are valid */
829 if (!(valid_hooks & (1 << i))) 857 if (!(valid_hooks & (1 << i)))
830 continue; 858 continue;
@@ -846,7 +874,7 @@ translate_table(const char *name,
846 /* Finally, each sanity check must pass */ 874 /* Finally, each sanity check must pass */
847 i = 0; 875 i = 0;
848 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, 876 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
849 check_entry, name, size, &i); 877 find_check_entry, name, size, &i);
850 878
851 if (ret != 0) { 879 if (ret != 0) {
852 IP6T_ENTRY_ITERATE(entry0, newinfo->size, 880 IP6T_ENTRY_ITERATE(entry0, newinfo->size,
@@ -860,7 +888,7 @@ translate_table(const char *name,
860 memcpy(newinfo->entries[i], entry0, newinfo->size); 888 memcpy(newinfo->entries[i], entry0, newinfo->size);
861 } 889 }
862 890
863 return 0; 891 return ret;
864} 892}
865 893
866/* Gets counters. */ 894/* Gets counters. */
@@ -920,33 +948,49 @@ get_counters(const struct xt_table_info *t,
920 } 948 }
921} 949}
922 950
923static int 951static struct xt_counters *alloc_counters(struct xt_table *table)
924copy_entries_to_user(unsigned int total_size,
925 struct xt_table *table,
926 void __user *userptr)
927{ 952{
928 unsigned int off, num, countersize; 953 unsigned int countersize;
929 struct ip6t_entry *e;
930 struct xt_counters *counters; 954 struct xt_counters *counters;
931 struct xt_table_info *private = table->private; 955 struct xt_table_info *private = table->private;
932 int ret = 0;
933 void *loc_cpu_entry;
934 956
935 /* We need atomic snapshot of counters: rest doesn't change 957 /* We need atomic snapshot of counters: rest doesn't change
936 (other than comefrom, which userspace doesn't care 958 (other than comefrom, which userspace doesn't care
937 about). */ 959 about). */
938 countersize = sizeof(struct xt_counters) * private->number; 960 countersize = sizeof(struct xt_counters) * private->number;
939 counters = vmalloc(countersize); 961 counters = vmalloc_node(countersize, numa_node_id());
940 962
941 if (counters == NULL) 963 if (counters == NULL)
942 return -ENOMEM; 964 return ERR_PTR(-ENOMEM);
943 965
944 /* First, sum counters... */ 966 /* First, sum counters... */
945 write_lock_bh(&table->lock); 967 write_lock_bh(&table->lock);
946 get_counters(private, counters); 968 get_counters(private, counters);
947 write_unlock_bh(&table->lock); 969 write_unlock_bh(&table->lock);
948 970
949 /* choose the copy that is on ourc node/cpu */ 971 return counters;
972}
973
974static int
975copy_entries_to_user(unsigned int total_size,
976 struct xt_table *table,
977 void __user *userptr)
978{
979 unsigned int off, num;
980 struct ip6t_entry *e;
981 struct xt_counters *counters;
982 struct xt_table_info *private = table->private;
983 int ret = 0;
984 void *loc_cpu_entry;
985
986 counters = alloc_counters(table);
987 if (IS_ERR(counters))
988 return PTR_ERR(counters);
989
990 /* choose the copy that is on our node/cpu, ...
991 * This choice is lazy (because current thread is
992 * allowed to migrate to another cpu)
993 */
950 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 994 loc_cpu_entry = private->entries[raw_smp_processor_id()];
951 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 995 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
952 ret = -EFAULT; 996 ret = -EFAULT;
@@ -1001,23 +1045,167 @@ copy_entries_to_user(unsigned int total_size,
1001 return ret; 1045 return ret;
1002} 1046}
1003 1047
1048#ifdef CONFIG_COMPAT
1049static void compat_standard_from_user(void *dst, void *src)
1050{
1051 int v = *(compat_int_t *)src;
1052
1053 if (v > 0)
1054 v += xt_compat_calc_jump(AF_INET6, v);
1055 memcpy(dst, &v, sizeof(v));
1056}
1057
1058static int compat_standard_to_user(void __user *dst, void *src)
1059{
1060 compat_int_t cv = *(int *)src;
1061
1062 if (cv > 0)
1063 cv -= xt_compat_calc_jump(AF_INET6, cv);
1064 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1065}
1066
1067static inline int
1068compat_calc_match(struct ip6t_entry_match *m, int *size)
1069{
1070 *size += xt_compat_match_offset(m->u.kernel.match);
1071 return 0;
1072}
1073
1074static int compat_calc_entry(struct ip6t_entry *e,
1075 const struct xt_table_info *info,
1076 void *base, struct xt_table_info *newinfo)
1077{
1078 struct ip6t_entry_target *t;
1079 unsigned int entry_offset;
1080 int off, i, ret;
1081
1082 off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1083 entry_offset = (void *)e - base;
1084 IP6T_MATCH_ITERATE(e, compat_calc_match, &off);
1085 t = ip6t_get_target(e);
1086 off += xt_compat_target_offset(t->u.kernel.target);
1087 newinfo->size -= off;
1088 ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
1089 if (ret)
1090 return ret;
1091
1092 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1093 if (info->hook_entry[i] &&
1094 (e < (struct ip6t_entry *)(base + info->hook_entry[i])))
1095 newinfo->hook_entry[i] -= off;
1096 if (info->underflow[i] &&
1097 (e < (struct ip6t_entry *)(base + info->underflow[i])))
1098 newinfo->underflow[i] -= off;
1099 }
1100 return 0;
1101}
1102
1103static int compat_table_info(const struct xt_table_info *info,
1104 struct xt_table_info *newinfo)
1105{
1106 void *loc_cpu_entry;
1107
1108 if (!newinfo || !info)
1109 return -EINVAL;
1110
1111 /* we dont care about newinfo->entries[] */
1112 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1113 newinfo->initial_entries = 0;
1114 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1115 return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size,
1116 compat_calc_entry, info, loc_cpu_entry,
1117 newinfo);
1118}
1119#endif
1120
1121static int get_info(struct net *net, void __user *user, int *len, int compat)
1122{
1123 char name[IP6T_TABLE_MAXNAMELEN];
1124 struct xt_table *t;
1125 int ret;
1126
1127 if (*len != sizeof(struct ip6t_getinfo)) {
1128 duprintf("length %u != %zu\n", *len,
1129 sizeof(struct ip6t_getinfo));
1130 return -EINVAL;
1131 }
1132
1133 if (copy_from_user(name, user, sizeof(name)) != 0)
1134 return -EFAULT;
1135
1136 name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
1137#ifdef CONFIG_COMPAT
1138 if (compat)
1139 xt_compat_lock(AF_INET6);
1140#endif
1141 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
1142 "ip6table_%s", name);
1143 if (t && !IS_ERR(t)) {
1144 struct ip6t_getinfo info;
1145 struct xt_table_info *private = t->private;
1146
1147#ifdef CONFIG_COMPAT
1148 if (compat) {
1149 struct xt_table_info tmp;
1150 ret = compat_table_info(private, &tmp);
1151 xt_compat_flush_offsets(AF_INET6);
1152 private = &tmp;
1153 }
1154#endif
1155 info.valid_hooks = t->valid_hooks;
1156 memcpy(info.hook_entry, private->hook_entry,
1157 sizeof(info.hook_entry));
1158 memcpy(info.underflow, private->underflow,
1159 sizeof(info.underflow));
1160 info.num_entries = private->number;
1161 info.size = private->size;
1162 strcpy(info.name, name);
1163
1164 if (copy_to_user(user, &info, *len) != 0)
1165 ret = -EFAULT;
1166 else
1167 ret = 0;
1168
1169 xt_table_unlock(t);
1170 module_put(t->me);
1171 } else
1172 ret = t ? PTR_ERR(t) : -ENOENT;
1173#ifdef CONFIG_COMPAT
1174 if (compat)
1175 xt_compat_unlock(AF_INET6);
1176#endif
1177 return ret;
1178}
1179
1004static int 1180static int
1005get_entries(const struct ip6t_get_entries *entries, 1181get_entries(struct net *net, struct ip6t_get_entries __user *uptr, int *len)
1006 struct ip6t_get_entries __user *uptr)
1007{ 1182{
1008 int ret; 1183 int ret;
1184 struct ip6t_get_entries get;
1009 struct xt_table *t; 1185 struct xt_table *t;
1010 1186
1011 t = xt_find_table_lock(AF_INET6, entries->name); 1187 if (*len < sizeof(get)) {
1188 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1189 return -EINVAL;
1190 }
1191 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1192 return -EFAULT;
1193 if (*len != sizeof(struct ip6t_get_entries) + get.size) {
1194 duprintf("get_entries: %u != %zu\n",
1195 *len, sizeof(get) + get.size);
1196 return -EINVAL;
1197 }
1198
1199 t = xt_find_table_lock(net, AF_INET6, get.name);
1012 if (t && !IS_ERR(t)) { 1200 if (t && !IS_ERR(t)) {
1013 struct xt_table_info *private = t->private; 1201 struct xt_table_info *private = t->private;
1014 duprintf("t->private->number = %u\n", private->number); 1202 duprintf("t->private->number = %u\n", private->number);
1015 if (entries->size == private->size) 1203 if (get.size == private->size)
1016 ret = copy_entries_to_user(private->size, 1204 ret = copy_entries_to_user(private->size,
1017 t, uptr->entrytable); 1205 t, uptr->entrytable);
1018 else { 1206 else {
1019 duprintf("get_entries: I've got %u not %u!\n", 1207 duprintf("get_entries: I've got %u not %u!\n",
1020 private->size, entries->size); 1208 private->size, get.size);
1021 ret = -EINVAL; 1209 ret = -EINVAL;
1022 } 1210 }
1023 module_put(t->me); 1211 module_put(t->me);
@@ -1029,67 +1217,40 @@ get_entries(const struct ip6t_get_entries *entries,
1029} 1217}
1030 1218
1031static int 1219static int
1032do_replace(void __user *user, unsigned int len) 1220__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1221 struct xt_table_info *newinfo, unsigned int num_counters,
1222 void __user *counters_ptr)
1033{ 1223{
1034 int ret; 1224 int ret;
1035 struct ip6t_replace tmp;
1036 struct xt_table *t; 1225 struct xt_table *t;
1037 struct xt_table_info *newinfo, *oldinfo; 1226 struct xt_table_info *oldinfo;
1038 struct xt_counters *counters; 1227 struct xt_counters *counters;
1039 void *loc_cpu_entry, *loc_cpu_old_entry; 1228 void *loc_cpu_old_entry;
1040
1041 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1042 return -EFAULT;
1043 1229
1044 /* overflow check */ 1230 ret = 0;
1045 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS - 1231 counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
1046 SMP_CACHE_BYTES) 1232 numa_node_id());
1047 return -ENOMEM;
1048 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1049 return -ENOMEM;
1050
1051 newinfo = xt_alloc_table_info(tmp.size);
1052 if (!newinfo)
1053 return -ENOMEM;
1054
1055 /* choose the copy that is on our node/cpu */
1056 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1057 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1058 tmp.size) != 0) {
1059 ret = -EFAULT;
1060 goto free_newinfo;
1061 }
1062
1063 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
1064 if (!counters) { 1233 if (!counters) {
1065 ret = -ENOMEM; 1234 ret = -ENOMEM;
1066 goto free_newinfo; 1235 goto out;
1067 } 1236 }
1068 1237
1069 ret = translate_table(tmp.name, tmp.valid_hooks, 1238 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
1070 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, 1239 "ip6table_%s", name);
1071 tmp.hook_entry, tmp.underflow);
1072 if (ret != 0)
1073 goto free_newinfo_counters;
1074
1075 duprintf("ip_tables: Translated table\n");
1076
1077 t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
1078 "ip6table_%s", tmp.name);
1079 if (!t || IS_ERR(t)) { 1240 if (!t || IS_ERR(t)) {
1080 ret = t ? PTR_ERR(t) : -ENOENT; 1241 ret = t ? PTR_ERR(t) : -ENOENT;
1081 goto free_newinfo_counters_untrans; 1242 goto free_newinfo_counters_untrans;
1082 } 1243 }
1083 1244
1084 /* You lied! */ 1245 /* You lied! */
1085 if (tmp.valid_hooks != t->valid_hooks) { 1246 if (valid_hooks != t->valid_hooks) {
1086 duprintf("Valid hook crap: %08X vs %08X\n", 1247 duprintf("Valid hook crap: %08X vs %08X\n",
1087 tmp.valid_hooks, t->valid_hooks); 1248 valid_hooks, t->valid_hooks);
1088 ret = -EINVAL; 1249 ret = -EINVAL;
1089 goto put_module; 1250 goto put_module;
1090 } 1251 }
1091 1252
1092 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); 1253 oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1093 if (!oldinfo) 1254 if (!oldinfo)
1094 goto put_module; 1255 goto put_module;
1095 1256
@@ -1107,10 +1268,11 @@ do_replace(void __user *user, unsigned int len)
1107 get_counters(oldinfo, counters); 1268 get_counters(oldinfo, counters);
1108 /* Decrease module usage counts and free resource */ 1269 /* Decrease module usage counts and free resource */
1109 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1270 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1110 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); 1271 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1272 NULL);
1111 xt_free_table_info(oldinfo); 1273 xt_free_table_info(oldinfo);
1112 if (copy_to_user(tmp.counters, counters, 1274 if (copy_to_user(counters_ptr, counters,
1113 sizeof(struct xt_counters) * tmp.num_counters) != 0) 1275 sizeof(struct xt_counters) * num_counters) != 0)
1114 ret = -EFAULT; 1276 ret = -EFAULT;
1115 vfree(counters); 1277 vfree(counters);
1116 xt_table_unlock(t); 1278 xt_table_unlock(t);
@@ -1120,9 +1282,54 @@ do_replace(void __user *user, unsigned int len)
1120 module_put(t->me); 1282 module_put(t->me);
1121 xt_table_unlock(t); 1283 xt_table_unlock(t);
1122 free_newinfo_counters_untrans: 1284 free_newinfo_counters_untrans:
1123 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1124 free_newinfo_counters:
1125 vfree(counters); 1285 vfree(counters);
1286 out:
1287 return ret;
1288}
1289
1290static int
1291do_replace(struct net *net, void __user *user, unsigned int len)
1292{
1293 int ret;
1294 struct ip6t_replace tmp;
1295 struct xt_table_info *newinfo;
1296 void *loc_cpu_entry;
1297
1298 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1299 return -EFAULT;
1300
1301 /* overflow check */
1302 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1303 return -ENOMEM;
1304
1305 newinfo = xt_alloc_table_info(tmp.size);
1306 if (!newinfo)
1307 return -ENOMEM;
1308
1309 /* choose the copy that is on our node/cpu */
1310 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1311 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1312 tmp.size) != 0) {
1313 ret = -EFAULT;
1314 goto free_newinfo;
1315 }
1316
1317 ret = translate_table(tmp.name, tmp.valid_hooks,
1318 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1319 tmp.hook_entry, tmp.underflow);
1320 if (ret != 0)
1321 goto free_newinfo;
1322
1323 duprintf("ip_tables: Translated table\n");
1324
1325 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1326 tmp.num_counters, tmp.counters);
1327 if (ret)
1328 goto free_newinfo_untrans;
1329 return 0;
1330
1331 free_newinfo_untrans:
1332 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1126 free_newinfo: 1333 free_newinfo:
1127 xt_free_table_info(newinfo); 1334 xt_free_table_info(newinfo);
1128 return ret; 1335 return ret;
@@ -1151,31 +1358,60 @@ add_counter_to_entry(struct ip6t_entry *e,
1151} 1358}
1152 1359
1153static int 1360static int
1154do_add_counters(void __user *user, unsigned int len) 1361do_add_counters(struct net *net, void __user *user, unsigned int len,
1362 int compat)
1155{ 1363{
1156 unsigned int i; 1364 unsigned int i;
1157 struct xt_counters_info tmp, *paddc; 1365 struct xt_counters_info tmp;
1158 struct xt_table_info *private; 1366 struct xt_counters *paddc;
1367 unsigned int num_counters;
1368 char *name;
1369 int size;
1370 void *ptmp;
1159 struct xt_table *t; 1371 struct xt_table *t;
1372 struct xt_table_info *private;
1160 int ret = 0; 1373 int ret = 0;
1161 void *loc_cpu_entry; 1374 void *loc_cpu_entry;
1375#ifdef CONFIG_COMPAT
1376 struct compat_xt_counters_info compat_tmp;
1162 1377
1163 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1378 if (compat) {
1379 ptmp = &compat_tmp;
1380 size = sizeof(struct compat_xt_counters_info);
1381 } else
1382#endif
1383 {
1384 ptmp = &tmp;
1385 size = sizeof(struct xt_counters_info);
1386 }
1387
1388 if (copy_from_user(ptmp, user, size) != 0)
1164 return -EFAULT; 1389 return -EFAULT;
1165 1390
1166 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) 1391#ifdef CONFIG_COMPAT
1392 if (compat) {
1393 num_counters = compat_tmp.num_counters;
1394 name = compat_tmp.name;
1395 } else
1396#endif
1397 {
1398 num_counters = tmp.num_counters;
1399 name = tmp.name;
1400 }
1401
1402 if (len != size + num_counters * sizeof(struct xt_counters))
1167 return -EINVAL; 1403 return -EINVAL;
1168 1404
1169 paddc = vmalloc(len); 1405 paddc = vmalloc_node(len - size, numa_node_id());
1170 if (!paddc) 1406 if (!paddc)
1171 return -ENOMEM; 1407 return -ENOMEM;
1172 1408
1173 if (copy_from_user(paddc, user, len) != 0) { 1409 if (copy_from_user(paddc, user + size, len - size) != 0) {
1174 ret = -EFAULT; 1410 ret = -EFAULT;
1175 goto free; 1411 goto free;
1176 } 1412 }
1177 1413
1178 t = xt_find_table_lock(AF_INET6, tmp.name); 1414 t = xt_find_table_lock(net, AF_INET6, name);
1179 if (!t || IS_ERR(t)) { 1415 if (!t || IS_ERR(t)) {
1180 ret = t ? PTR_ERR(t) : -ENOENT; 1416 ret = t ? PTR_ERR(t) : -ENOENT;
1181 goto free; 1417 goto free;
@@ -1183,18 +1419,18 @@ do_add_counters(void __user *user, unsigned int len)
1183 1419
1184 write_lock_bh(&t->lock); 1420 write_lock_bh(&t->lock);
1185 private = t->private; 1421 private = t->private;
1186 if (private->number != tmp.num_counters) { 1422 if (private->number != num_counters) {
1187 ret = -EINVAL; 1423 ret = -EINVAL;
1188 goto unlock_up_free; 1424 goto unlock_up_free;
1189 } 1425 }
1190 1426
1191 i = 0; 1427 i = 0;
1192 /* Choose the copy that is on our node */ 1428 /* Choose the copy that is on our node */
1193 loc_cpu_entry = private->entries[smp_processor_id()]; 1429 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1194 IP6T_ENTRY_ITERATE(loc_cpu_entry, 1430 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1195 private->size, 1431 private->size,
1196 add_counter_to_entry, 1432 add_counter_to_entry,
1197 paddc->counters, 1433 paddc,
1198 &i); 1434 &i);
1199 unlock_up_free: 1435 unlock_up_free:
1200 write_unlock_bh(&t->lock); 1436 write_unlock_bh(&t->lock);
@@ -1206,8 +1442,435 @@ do_add_counters(void __user *user, unsigned int len)
1206 return ret; 1442 return ret;
1207} 1443}
1208 1444
1445#ifdef CONFIG_COMPAT
1446struct compat_ip6t_replace {
1447 char name[IP6T_TABLE_MAXNAMELEN];
1448 u32 valid_hooks;
1449 u32 num_entries;
1450 u32 size;
1451 u32 hook_entry[NF_INET_NUMHOOKS];
1452 u32 underflow[NF_INET_NUMHOOKS];
1453 u32 num_counters;
1454 compat_uptr_t counters; /* struct ip6t_counters * */
1455 struct compat_ip6t_entry entries[0];
1456};
1457
1209static int 1458static int
1210do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1459compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1460 unsigned int *size, struct xt_counters *counters,
1461 unsigned int *i)
1462{
1463 struct ip6t_entry_target *t;
1464 struct compat_ip6t_entry __user *ce;
1465 u_int16_t target_offset, next_offset;
1466 compat_uint_t origsize;
1467 int ret;
1468
1469 ret = -EFAULT;
1470 origsize = *size;
1471 ce = (struct compat_ip6t_entry __user *)*dstptr;
1472 if (copy_to_user(ce, e, sizeof(struct ip6t_entry)))
1473 goto out;
1474
1475 if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1476 goto out;
1477
1478 *dstptr += sizeof(struct compat_ip6t_entry);
1479 *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1480
1481 ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1482 target_offset = e->target_offset - (origsize - *size);
1483 if (ret)
1484 goto out;
1485 t = ip6t_get_target(e);
1486 ret = xt_compat_target_to_user(t, dstptr, size);
1487 if (ret)
1488 goto out;
1489 ret = -EFAULT;
1490 next_offset = e->next_offset - (origsize - *size);
1491 if (put_user(target_offset, &ce->target_offset))
1492 goto out;
1493 if (put_user(next_offset, &ce->next_offset))
1494 goto out;
1495
1496 (*i)++;
1497 return 0;
1498out:
1499 return ret;
1500}
1501
1502static int
1503compat_find_calc_match(struct ip6t_entry_match *m,
1504 const char *name,
1505 const struct ip6t_ip6 *ipv6,
1506 unsigned int hookmask,
1507 int *size, unsigned int *i)
1508{
1509 struct xt_match *match;
1510
1511 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
1512 m->u.user.revision),
1513 "ip6t_%s", m->u.user.name);
1514 if (IS_ERR(match) || !match) {
1515 duprintf("compat_check_calc_match: `%s' not found\n",
1516 m->u.user.name);
1517 return match ? PTR_ERR(match) : -ENOENT;
1518 }
1519 m->u.kernel.match = match;
1520 *size += xt_compat_match_offset(match);
1521
1522 (*i)++;
1523 return 0;
1524}
1525
1526static int
1527compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
1528{
1529 if (i && (*i)-- == 0)
1530 return 1;
1531
1532 module_put(m->u.kernel.match->me);
1533 return 0;
1534}
1535
1536static int
1537compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i)
1538{
1539 struct ip6t_entry_target *t;
1540
1541 if (i && (*i)-- == 0)
1542 return 1;
1543
1544 /* Cleanup all matches */
1545 COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
1546 t = compat_ip6t_get_target(e);
1547 module_put(t->u.kernel.target->me);
1548 return 0;
1549}
1550
1551static int
1552check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1553 struct xt_table_info *newinfo,
1554 unsigned int *size,
1555 unsigned char *base,
1556 unsigned char *limit,
1557 unsigned int *hook_entries,
1558 unsigned int *underflows,
1559 unsigned int *i,
1560 const char *name)
1561{
1562 struct ip6t_entry_target *t;
1563 struct xt_target *target;
1564 unsigned int entry_offset;
1565 unsigned int j;
1566 int ret, off, h;
1567
1568 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1569 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0
1570 || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
1571 duprintf("Bad offset %p, limit = %p\n", e, limit);
1572 return -EINVAL;
1573 }
1574
1575 if (e->next_offset < sizeof(struct compat_ip6t_entry) +
1576 sizeof(struct compat_xt_entry_target)) {
1577 duprintf("checking: element %p size %u\n",
1578 e, e->next_offset);
1579 return -EINVAL;
1580 }
1581
1582 /* For purposes of check_entry casting the compat entry is fine */
1583 ret = check_entry((struct ip6t_entry *)e, name);
1584 if (ret)
1585 return ret;
1586
1587 off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1588 entry_offset = (void *)e - (void *)base;
1589 j = 0;
1590 ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name,
1591 &e->ipv6, e->comefrom, &off, &j);
1592 if (ret != 0)
1593 goto release_matches;
1594
1595 t = compat_ip6t_get_target(e);
1596 target = try_then_request_module(xt_find_target(AF_INET6,
1597 t->u.user.name,
1598 t->u.user.revision),
1599 "ip6t_%s", t->u.user.name);
1600 if (IS_ERR(target) || !target) {
1601 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1602 t->u.user.name);
1603 ret = target ? PTR_ERR(target) : -ENOENT;
1604 goto release_matches;
1605 }
1606 t->u.kernel.target = target;
1607
1608 off += xt_compat_target_offset(target);
1609 *size += off;
1610 ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
1611 if (ret)
1612 goto out;
1613
1614 /* Check hooks & underflows */
1615 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1616 if ((unsigned char *)e - base == hook_entries[h])
1617 newinfo->hook_entry[h] = hook_entries[h];
1618 if ((unsigned char *)e - base == underflows[h])
1619 newinfo->underflow[h] = underflows[h];
1620 }
1621
1622 /* Clear counters and comefrom */
1623 memset(&e->counters, 0, sizeof(e->counters));
1624 e->comefrom = 0;
1625
1626 (*i)++;
1627 return 0;
1628
1629out:
1630 module_put(t->u.kernel.target->me);
1631release_matches:
1632 IP6T_MATCH_ITERATE(e, compat_release_match, &j);
1633 return ret;
1634}
1635
1636static int
1637compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
1638 unsigned int *size, const char *name,
1639 struct xt_table_info *newinfo, unsigned char *base)
1640{
1641 struct ip6t_entry_target *t;
1642 struct xt_target *target;
1643 struct ip6t_entry *de;
1644 unsigned int origsize;
1645 int ret, h;
1646
1647 ret = 0;
1648 origsize = *size;
1649 de = (struct ip6t_entry *)*dstptr;
1650 memcpy(de, e, sizeof(struct ip6t_entry));
1651 memcpy(&de->counters, &e->counters, sizeof(e->counters));
1652
1653 *dstptr += sizeof(struct ip6t_entry);
1654 *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1655
1656 ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user,
1657 dstptr, size);
1658 if (ret)
1659 return ret;
1660 de->target_offset = e->target_offset - (origsize - *size);
1661 t = compat_ip6t_get_target(e);
1662 target = t->u.kernel.target;
1663 xt_compat_target_from_user(t, dstptr, size);
1664
1665 de->next_offset = e->next_offset - (origsize - *size);
1666 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1667 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1668 newinfo->hook_entry[h] -= origsize - *size;
1669 if ((unsigned char *)de - base < newinfo->underflow[h])
1670 newinfo->underflow[h] -= origsize - *size;
1671 }
1672 return ret;
1673}
1674
1675static int compat_check_entry(struct ip6t_entry *e, const char *name,
1676 unsigned int *i)
1677{
1678 unsigned int j;
1679 int ret;
1680
1681 j = 0;
1682 ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6,
1683 e->comefrom, &j);
1684 if (ret)
1685 goto cleanup_matches;
1686
1687 ret = check_target(e, name);
1688 if (ret)
1689 goto cleanup_matches;
1690
1691 (*i)++;
1692 return 0;
1693
1694 cleanup_matches:
1695 IP6T_MATCH_ITERATE(e, cleanup_match, &j);
1696 return ret;
1697}
1698
1699static int
1700translate_compat_table(const char *name,
1701 unsigned int valid_hooks,
1702 struct xt_table_info **pinfo,
1703 void **pentry0,
1704 unsigned int total_size,
1705 unsigned int number,
1706 unsigned int *hook_entries,
1707 unsigned int *underflows)
1708{
1709 unsigned int i, j;
1710 struct xt_table_info *newinfo, *info;
1711 void *pos, *entry0, *entry1;
1712 unsigned int size;
1713 int ret;
1714
1715 info = *pinfo;
1716 entry0 = *pentry0;
1717 size = total_size;
1718 info->number = number;
1719
1720 /* Init all hooks to impossible value. */
1721 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1722 info->hook_entry[i] = 0xFFFFFFFF;
1723 info->underflow[i] = 0xFFFFFFFF;
1724 }
1725
1726 duprintf("translate_compat_table: size %u\n", info->size);
1727 j = 0;
1728 xt_compat_lock(AF_INET6);
1729 /* Walk through entries, checking offsets. */
1730 ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
1731 check_compat_entry_size_and_hooks,
1732 info, &size, entry0,
1733 entry0 + total_size,
1734 hook_entries, underflows, &j, name);
1735 if (ret != 0)
1736 goto out_unlock;
1737
1738 ret = -EINVAL;
1739 if (j != number) {
1740 duprintf("translate_compat_table: %u not %u entries\n",
1741 j, number);
1742 goto out_unlock;
1743 }
1744
1745 /* Check hooks all assigned */
1746 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1747 /* Only hooks which are valid */
1748 if (!(valid_hooks & (1 << i)))
1749 continue;
1750 if (info->hook_entry[i] == 0xFFFFFFFF) {
1751 duprintf("Invalid hook entry %u %u\n",
1752 i, hook_entries[i]);
1753 goto out_unlock;
1754 }
1755 if (info->underflow[i] == 0xFFFFFFFF) {
1756 duprintf("Invalid underflow %u %u\n",
1757 i, underflows[i]);
1758 goto out_unlock;
1759 }
1760 }
1761
1762 ret = -ENOMEM;
1763 newinfo = xt_alloc_table_info(size);
1764 if (!newinfo)
1765 goto out_unlock;
1766
1767 newinfo->number = number;
1768 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1769 newinfo->hook_entry[i] = info->hook_entry[i];
1770 newinfo->underflow[i] = info->underflow[i];
1771 }
1772 entry1 = newinfo->entries[raw_smp_processor_id()];
1773 pos = entry1;
1774 size = total_size;
1775 ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
1776 compat_copy_entry_from_user,
1777 &pos, &size, name, newinfo, entry1);
1778 xt_compat_flush_offsets(AF_INET6);
1779 xt_compat_unlock(AF_INET6);
1780 if (ret)
1781 goto free_newinfo;
1782
1783 ret = -ELOOP;
1784 if (!mark_source_chains(newinfo, valid_hooks, entry1))
1785 goto free_newinfo;
1786
1787 i = 0;
1788 ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1789 name, &i);
1790 if (ret) {
1791 j -= i;
1792 COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1793 compat_release_entry, &j);
1794 IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1795 xt_free_table_info(newinfo);
1796 return ret;
1797 }
1798
1799 /* And one copy for every other CPU */
1800 for_each_possible_cpu(i)
1801 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1802 memcpy(newinfo->entries[i], entry1, newinfo->size);
1803
1804 *pinfo = newinfo;
1805 *pentry0 = entry1;
1806 xt_free_table_info(info);
1807 return 0;
1808
1809free_newinfo:
1810 xt_free_table_info(newinfo);
1811out:
1812 COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1813 return ret;
1814out_unlock:
1815 xt_compat_flush_offsets(AF_INET6);
1816 xt_compat_unlock(AF_INET6);
1817 goto out;
1818}
1819
1820static int
1821compat_do_replace(struct net *net, void __user *user, unsigned int len)
1822{
1823 int ret;
1824 struct compat_ip6t_replace tmp;
1825 struct xt_table_info *newinfo;
1826 void *loc_cpu_entry;
1827
1828 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1829 return -EFAULT;
1830
1831 /* overflow check */
1832 if (tmp.size >= INT_MAX / num_possible_cpus())
1833 return -ENOMEM;
1834 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1835 return -ENOMEM;
1836
1837 newinfo = xt_alloc_table_info(tmp.size);
1838 if (!newinfo)
1839 return -ENOMEM;
1840
1841 /* choose the copy that is on our node/cpu */
1842 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1843 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1844 tmp.size) != 0) {
1845 ret = -EFAULT;
1846 goto free_newinfo;
1847 }
1848
1849 ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1850 &newinfo, &loc_cpu_entry, tmp.size,
1851 tmp.num_entries, tmp.hook_entry,
1852 tmp.underflow);
1853 if (ret != 0)
1854 goto free_newinfo;
1855
1856 duprintf("compat_do_replace: Translated table\n");
1857
1858 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1859 tmp.num_counters, compat_ptr(tmp.counters));
1860 if (ret)
1861 goto free_newinfo_untrans;
1862 return 0;
1863
1864 free_newinfo_untrans:
1865 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1866 free_newinfo:
1867 xt_free_table_info(newinfo);
1868 return ret;
1869}
1870
1871static int
1872compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1873 unsigned int len)
1211{ 1874{
1212 int ret; 1875 int ret;
1213 1876
@@ -1216,11 +1879,11 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1216 1879
1217 switch (cmd) { 1880 switch (cmd) {
1218 case IP6T_SO_SET_REPLACE: 1881 case IP6T_SO_SET_REPLACE:
1219 ret = do_replace(user, len); 1882 ret = compat_do_replace(sk->sk_net, user, len);
1220 break; 1883 break;
1221 1884
1222 case IP6T_SO_SET_ADD_COUNTERS: 1885 case IP6T_SO_SET_ADD_COUNTERS:
1223 ret = do_add_counters(user, len); 1886 ret = do_add_counters(sk->sk_net, user, len, 1);
1224 break; 1887 break;
1225 1888
1226 default: 1889 default:
@@ -1231,75 +1894,156 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1231 return ret; 1894 return ret;
1232} 1895}
1233 1896
1897struct compat_ip6t_get_entries {
1898 char name[IP6T_TABLE_MAXNAMELEN];
1899 compat_uint_t size;
1900 struct compat_ip6t_entry entrytable[0];
1901};
1902
1234static int 1903static int
1235do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 1904compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1905 void __user *userptr)
1906{
1907 struct xt_counters *counters;
1908 struct xt_table_info *private = table->private;
1909 void __user *pos;
1910 unsigned int size;
1911 int ret = 0;
1912 void *loc_cpu_entry;
1913 unsigned int i = 0;
1914
1915 counters = alloc_counters(table);
1916 if (IS_ERR(counters))
1917 return PTR_ERR(counters);
1918
1919 /* choose the copy that is on our node/cpu, ...
1920 * This choice is lazy (because current thread is
1921 * allowed to migrate to another cpu)
1922 */
1923 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1924 pos = userptr;
1925 size = total_size;
1926 ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size,
1927 compat_copy_entry_to_user,
1928 &pos, &size, counters, &i);
1929
1930 vfree(counters);
1931 return ret;
1932}
1933
1934static int
1935compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1936 int *len)
1236{ 1937{
1237 int ret; 1938 int ret;
1939 struct compat_ip6t_get_entries get;
1940 struct xt_table *t;
1238 1941
1239 if (!capable(CAP_NET_ADMIN)) 1942 if (*len < sizeof(get)) {
1240 return -EPERM; 1943 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1944 return -EINVAL;
1945 }
1241 1946
1242 switch (cmd) { 1947 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1243 case IP6T_SO_GET_INFO: { 1948 return -EFAULT;
1244 char name[IP6T_TABLE_MAXNAMELEN];
1245 struct xt_table *t;
1246 1949
1247 if (*len != sizeof(struct ip6t_getinfo)) { 1950 if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
1248 duprintf("length %u != %u\n", *len, 1951 duprintf("compat_get_entries: %u != %zu\n",
1249 sizeof(struct ip6t_getinfo)); 1952 *len, sizeof(get) + get.size);
1953 return -EINVAL;
1954 }
1955
1956 xt_compat_lock(AF_INET6);
1957 t = xt_find_table_lock(net, AF_INET6, get.name);
1958 if (t && !IS_ERR(t)) {
1959 struct xt_table_info *private = t->private;
1960 struct xt_table_info info;
1961 duprintf("t->private->number = %u\n", private->number);
1962 ret = compat_table_info(private, &info);
1963 if (!ret && get.size == info.size) {
1964 ret = compat_copy_entries_to_user(private->size,
1965 t, uptr->entrytable);
1966 } else if (!ret) {
1967 duprintf("compat_get_entries: I've got %u not %u!\n",
1968 private->size, get.size);
1250 ret = -EINVAL; 1969 ret = -EINVAL;
1251 break;
1252 } 1970 }
1971 xt_compat_flush_offsets(AF_INET6);
1972 module_put(t->me);
1973 xt_table_unlock(t);
1974 } else
1975 ret = t ? PTR_ERR(t) : -ENOENT;
1253 1976
1254 if (copy_from_user(name, user, sizeof(name)) != 0) { 1977 xt_compat_unlock(AF_INET6);
1255 ret = -EFAULT; 1978 return ret;
1256 break; 1979}
1257 } 1980
1258 name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; 1981static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *);
1259 1982
1260 t = try_then_request_module(xt_find_table_lock(AF_INET6, name), 1983static int
1261 "ip6table_%s", name); 1984compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1262 if (t && !IS_ERR(t)) { 1985{
1263 struct ip6t_getinfo info; 1986 int ret;
1264 struct xt_table_info *private = t->private; 1987
1265 1988 if (!capable(CAP_NET_ADMIN))
1266 info.valid_hooks = t->valid_hooks; 1989 return -EPERM;
1267 memcpy(info.hook_entry, private->hook_entry, 1990
1268 sizeof(info.hook_entry)); 1991 switch (cmd) {
1269 memcpy(info.underflow, private->underflow, 1992 case IP6T_SO_GET_INFO:
1270 sizeof(info.underflow)); 1993 ret = get_info(sk->sk_net, user, len, 1);
1271 info.num_entries = private->number; 1994 break;
1272 info.size = private->size; 1995 case IP6T_SO_GET_ENTRIES:
1273 memcpy(info.name, name, sizeof(info.name)); 1996 ret = compat_get_entries(sk->sk_net, user, len);
1274 1997 break;
1275 if (copy_to_user(user, &info, *len) != 0) 1998 default:
1276 ret = -EFAULT; 1999 ret = do_ip6t_get_ctl(sk, cmd, user, len);
1277 else
1278 ret = 0;
1279 xt_table_unlock(t);
1280 module_put(t->me);
1281 } else
1282 ret = t ? PTR_ERR(t) : -ENOENT;
1283 } 2000 }
1284 break; 2001 return ret;
2002}
2003#endif
1285 2004
1286 case IP6T_SO_GET_ENTRIES: { 2005static int
1287 struct ip6t_get_entries get; 2006do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2007{
2008 int ret;
1288 2009
1289 if (*len < sizeof(get)) { 2010 if (!capable(CAP_NET_ADMIN))
1290 duprintf("get_entries: %u < %u\n", *len, sizeof(get)); 2011 return -EPERM;
1291 ret = -EINVAL; 2012
1292 } else if (copy_from_user(&get, user, sizeof(get)) != 0) { 2013 switch (cmd) {
1293 ret = -EFAULT; 2014 case IP6T_SO_SET_REPLACE:
1294 } else if (*len != sizeof(struct ip6t_get_entries) + get.size) { 2015 ret = do_replace(sk->sk_net, user, len);
1295 duprintf("get_entries: %u != %u\n", *len, 2016 break;
1296 sizeof(struct ip6t_get_entries) + get.size); 2017
1297 ret = -EINVAL; 2018 case IP6T_SO_SET_ADD_COUNTERS:
1298 } else 2019 ret = do_add_counters(sk->sk_net, user, len, 0);
1299 ret = get_entries(&get, user);
1300 break; 2020 break;
2021
2022 default:
2023 duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
2024 ret = -EINVAL;
1301 } 2025 }
1302 2026
2027 return ret;
2028}
2029
2030static int
2031do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2032{
2033 int ret;
2034
2035 if (!capable(CAP_NET_ADMIN))
2036 return -EPERM;
2037
2038 switch (cmd) {
2039 case IP6T_SO_GET_INFO:
2040 ret = get_info(sk->sk_net, user, len, 0);
2041 break;
2042
2043 case IP6T_SO_GET_ENTRIES:
2044 ret = get_entries(sk->sk_net, user, len);
2045 break;
2046
1303 case IP6T_SO_GET_REVISION_MATCH: 2047 case IP6T_SO_GET_REVISION_MATCH:
1304 case IP6T_SO_GET_REVISION_TARGET: { 2048 case IP6T_SO_GET_REVISION_TARGET: {
1305 struct ip6t_get_revision rev; 2049 struct ip6t_get_revision rev;
@@ -1334,20 +2078,23 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1334 return ret; 2078 return ret;
1335} 2079}
1336 2080
1337int ip6t_register_table(struct xt_table *table, 2081struct xt_table *ip6t_register_table(struct net *net, struct xt_table *table,
1338 const struct ip6t_replace *repl) 2082 const struct ip6t_replace *repl)
1339{ 2083{
1340 int ret; 2084 int ret;
1341 struct xt_table_info *newinfo; 2085 struct xt_table_info *newinfo;
1342 static struct xt_table_info bootstrap 2086 struct xt_table_info bootstrap
1343 = { 0, 0, 0, { 0 }, { 0 }, { } }; 2087 = { 0, 0, 0, { 0 }, { 0 }, { } };
1344 void *loc_cpu_entry; 2088 void *loc_cpu_entry;
2089 struct xt_table *new_table;
1345 2090
1346 newinfo = xt_alloc_table_info(repl->size); 2091 newinfo = xt_alloc_table_info(repl->size);
1347 if (!newinfo) 2092 if (!newinfo) {
1348 return -ENOMEM; 2093 ret = -ENOMEM;
2094 goto out;
2095 }
1349 2096
1350 /* choose the copy on our node/cpu */ 2097 /* choose the copy on our node/cpu, but dont care about preemption */
1351 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 2098 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1352 memcpy(loc_cpu_entry, repl->entries, repl->size); 2099 memcpy(loc_cpu_entry, repl->entries, repl->size);
1353 2100
@@ -1356,30 +2103,35 @@ int ip6t_register_table(struct xt_table *table,
1356 repl->num_entries, 2103 repl->num_entries,
1357 repl->hook_entry, 2104 repl->hook_entry,
1358 repl->underflow); 2105 repl->underflow);
1359 if (ret != 0) { 2106 if (ret != 0)
1360 xt_free_table_info(newinfo); 2107 goto out_free;
1361 return ret;
1362 }
1363 2108
1364 ret = xt_register_table(table, &bootstrap, newinfo); 2109 new_table = xt_register_table(net, table, &bootstrap, newinfo);
1365 if (ret != 0) { 2110 if (IS_ERR(new_table)) {
1366 xt_free_table_info(newinfo); 2111 ret = PTR_ERR(new_table);
1367 return ret; 2112 goto out_free;
1368 } 2113 }
2114 return new_table;
1369 2115
1370 return 0; 2116out_free:
2117 xt_free_table_info(newinfo);
2118out:
2119 return ERR_PTR(ret);
1371} 2120}
1372 2121
1373void ip6t_unregister_table(struct xt_table *table) 2122void ip6t_unregister_table(struct xt_table *table)
1374{ 2123{
1375 struct xt_table_info *private; 2124 struct xt_table_info *private;
1376 void *loc_cpu_entry; 2125 void *loc_cpu_entry;
2126 struct module *table_owner = table->me;
1377 2127
1378 private = xt_unregister_table(table); 2128 private = xt_unregister_table(table);
1379 2129
1380 /* Decrease module usage counts and free resources */ 2130 /* Decrease module usage counts and free resources */
1381 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 2131 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1382 IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); 2132 IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2133 if (private->number > private->initial_entries)
2134 module_put(table_owner);
1383 xt_free_table_info(private); 2135 xt_free_table_info(private);
1384} 2136}
1385 2137
@@ -1403,17 +2155,18 @@ icmp6_match(const struct sk_buff *skb,
1403 unsigned int protoff, 2155 unsigned int protoff,
1404 bool *hotdrop) 2156 bool *hotdrop)
1405{ 2157{
1406 struct icmp6hdr _icmp, *ic; 2158 struct icmp6hdr _icmph, *ic;
1407 const struct ip6t_icmp *icmpinfo = matchinfo; 2159 const struct ip6t_icmp *icmpinfo = matchinfo;
1408 2160
1409 /* Must not be a fragment. */ 2161 /* Must not be a fragment. */
1410 if (offset) 2162 if (offset)
1411 return false; 2163 return false;
1412 2164
1413 ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp); 2165 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1414 if (ic == NULL) { 2166 if (ic == NULL) {
1415 /* We've been asked to examine this packet, and we 2167 /* We've been asked to examine this packet, and we
1416 can't. Hence, no choice but to drop. */ 2168 * can't. Hence, no choice but to drop.
2169 */
1417 duprintf("Dropping evil ICMP tinygram.\n"); 2170 duprintf("Dropping evil ICMP tinygram.\n");
1418 *hotdrop = true; 2171 *hotdrop = true;
1419 return false; 2172 return false;
@@ -1445,6 +2198,11 @@ static struct xt_target ip6t_standard_target __read_mostly = {
1445 .name = IP6T_STANDARD_TARGET, 2198 .name = IP6T_STANDARD_TARGET,
1446 .targetsize = sizeof(int), 2199 .targetsize = sizeof(int),
1447 .family = AF_INET6, 2200 .family = AF_INET6,
2201#ifdef CONFIG_COMPAT
2202 .compatsize = sizeof(compat_int_t),
2203 .compat_from_user = compat_standard_from_user,
2204 .compat_to_user = compat_standard_to_user,
2205#endif
1448}; 2206};
1449 2207
1450static struct xt_target ip6t_error_target __read_mostly = { 2208static struct xt_target ip6t_error_target __read_mostly = {
@@ -1459,26 +2217,47 @@ static struct nf_sockopt_ops ip6t_sockopts = {
1459 .set_optmin = IP6T_BASE_CTL, 2217 .set_optmin = IP6T_BASE_CTL,
1460 .set_optmax = IP6T_SO_SET_MAX+1, 2218 .set_optmax = IP6T_SO_SET_MAX+1,
1461 .set = do_ip6t_set_ctl, 2219 .set = do_ip6t_set_ctl,
2220#ifdef CONFIG_COMPAT
2221 .compat_set = compat_do_ip6t_set_ctl,
2222#endif
1462 .get_optmin = IP6T_BASE_CTL, 2223 .get_optmin = IP6T_BASE_CTL,
1463 .get_optmax = IP6T_SO_GET_MAX+1, 2224 .get_optmax = IP6T_SO_GET_MAX+1,
1464 .get = do_ip6t_get_ctl, 2225 .get = do_ip6t_get_ctl,
2226#ifdef CONFIG_COMPAT
2227 .compat_get = compat_do_ip6t_get_ctl,
2228#endif
1465 .owner = THIS_MODULE, 2229 .owner = THIS_MODULE,
1466}; 2230};
1467 2231
1468static struct xt_match icmp6_matchstruct __read_mostly = { 2232static struct xt_match icmp6_matchstruct __read_mostly = {
1469 .name = "icmp6", 2233 .name = "icmp6",
1470 .match = &icmp6_match, 2234 .match = icmp6_match,
1471 .matchsize = sizeof(struct ip6t_icmp), 2235 .matchsize = sizeof(struct ip6t_icmp),
1472 .checkentry = icmp6_checkentry, 2236 .checkentry = icmp6_checkentry,
1473 .proto = IPPROTO_ICMPV6, 2237 .proto = IPPROTO_ICMPV6,
1474 .family = AF_INET6, 2238 .family = AF_INET6,
1475}; 2239};
1476 2240
2241static int __net_init ip6_tables_net_init(struct net *net)
2242{
2243 return xt_proto_init(net, AF_INET6);
2244}
2245
2246static void __net_exit ip6_tables_net_exit(struct net *net)
2247{
2248 xt_proto_fini(net, AF_INET6);
2249}
2250
2251static struct pernet_operations ip6_tables_net_ops = {
2252 .init = ip6_tables_net_init,
2253 .exit = ip6_tables_net_exit,
2254};
2255
1477static int __init ip6_tables_init(void) 2256static int __init ip6_tables_init(void)
1478{ 2257{
1479 int ret; 2258 int ret;
1480 2259
1481 ret = xt_proto_init(AF_INET6); 2260 ret = register_pernet_subsys(&ip6_tables_net_ops);
1482 if (ret < 0) 2261 if (ret < 0)
1483 goto err1; 2262 goto err1;
1484 2263
@@ -1508,7 +2287,7 @@ err4:
1508err3: 2287err3:
1509 xt_unregister_target(&ip6t_standard_target); 2288 xt_unregister_target(&ip6t_standard_target);
1510err2: 2289err2:
1511 xt_proto_fini(AF_INET6); 2290 unregister_pernet_subsys(&ip6_tables_net_ops);
1512err1: 2291err1:
1513 return ret; 2292 return ret;
1514} 2293}
@@ -1516,10 +2295,12 @@ err1:
1516static void __exit ip6_tables_fini(void) 2295static void __exit ip6_tables_fini(void)
1517{ 2296{
1518 nf_unregister_sockopt(&ip6t_sockopts); 2297 nf_unregister_sockopt(&ip6t_sockopts);
2298
1519 xt_unregister_match(&icmp6_matchstruct); 2299 xt_unregister_match(&icmp6_matchstruct);
1520 xt_unregister_target(&ip6t_error_target); 2300 xt_unregister_target(&ip6t_error_target);
1521 xt_unregister_target(&ip6t_standard_target); 2301 xt_unregister_target(&ip6t_standard_target);
1522 xt_proto_fini(AF_INET6); 2302
2303 unregister_pernet_subsys(&ip6_tables_net_ops);
1523} 2304}
1524 2305
1525/* 2306/*
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 9afc836fd454..d5f8fd5f29d3 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -15,15 +15,13 @@
15#include <linux/netfilter_ipv6/ip6t_HL.h> 15#include <linux/netfilter_ipv6/ip6t_HL.h>
16 16
17MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); 17MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
18MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); 18MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20 20
21static unsigned int ip6t_hl_target(struct sk_buff *skb, 21static unsigned int
22 const struct net_device *in, 22hl_tg6(struct sk_buff *skb, const struct net_device *in,
23 const struct net_device *out, 23 const struct net_device *out, unsigned int hooknum,
24 unsigned int hooknum, 24 const struct xt_target *target, const void *targinfo)
25 const struct xt_target *target,
26 const void *targinfo)
27{ 25{
28 struct ipv6hdr *ip6h; 26 struct ipv6hdr *ip6h;
29 const struct ip6t_HL_info *info = targinfo; 27 const struct ip6t_HL_info *info = targinfo;
@@ -58,11 +56,10 @@ static unsigned int ip6t_hl_target(struct sk_buff *skb,
58 return XT_CONTINUE; 56 return XT_CONTINUE;
59} 57}
60 58
61static bool ip6t_hl_checkentry(const char *tablename, 59static bool
62 const void *entry, 60hl_tg6_check(const char *tablename, const void *entry,
63 const struct xt_target *target, 61 const struct xt_target *target, void *targinfo,
64 void *targinfo, 62 unsigned int hook_mask)
65 unsigned int hook_mask)
66{ 63{
67 const struct ip6t_HL_info *info = targinfo; 64 const struct ip6t_HL_info *info = targinfo;
68 65
@@ -79,25 +76,25 @@ static bool ip6t_hl_checkentry(const char *tablename,
79 return true; 76 return true;
80} 77}
81 78
82static struct xt_target ip6t_HL __read_mostly = { 79static struct xt_target hl_tg6_reg __read_mostly = {
83 .name = "HL", 80 .name = "HL",
84 .family = AF_INET6, 81 .family = AF_INET6,
85 .target = ip6t_hl_target, 82 .target = hl_tg6,
86 .targetsize = sizeof(struct ip6t_HL_info), 83 .targetsize = sizeof(struct ip6t_HL_info),
87 .table = "mangle", 84 .table = "mangle",
88 .checkentry = ip6t_hl_checkentry, 85 .checkentry = hl_tg6_check,
89 .me = THIS_MODULE 86 .me = THIS_MODULE
90}; 87};
91 88
92static int __init ip6t_hl_init(void) 89static int __init hl_tg6_init(void)
93{ 90{
94 return xt_register_target(&ip6t_HL); 91 return xt_register_target(&hl_tg6_reg);
95} 92}
96 93
97static void __exit ip6t_hl_fini(void) 94static void __exit hl_tg6_exit(void)
98{ 95{
99 xt_unregister_target(&ip6t_HL); 96 xt_unregister_target(&hl_tg6_reg);
100} 97}
101 98
102module_init(ip6t_hl_init); 99module_init(hl_tg6_init);
103module_exit(ip6t_hl_fini); 100module_exit(hl_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 7a48c342df46..86a613810b69 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -23,9 +23,10 @@
23#include <linux/netfilter.h> 23#include <linux/netfilter.h>
24#include <linux/netfilter/x_tables.h> 24#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter_ipv6/ip6_tables.h> 25#include <linux/netfilter_ipv6/ip6_tables.h>
26#include <net/netfilter/nf_log.h>
26 27
27MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); 28MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
28MODULE_DESCRIPTION("IP6 tables LOG target module"); 29MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
29MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
30 31
31struct in_device; 32struct in_device;
@@ -362,7 +363,9 @@ static void dump_packet(const struct nf_loginfo *info,
362 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { 363 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
363 read_lock_bh(&skb->sk->sk_callback_lock); 364 read_lock_bh(&skb->sk->sk_callback_lock);
364 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 365 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
365 printk("UID=%u ", skb->sk->sk_socket->file->f_uid); 366 printk("UID=%u GID=%u",
367 skb->sk->sk_socket->file->f_uid,
368 skb->sk->sk_socket->file->f_gid);
366 read_unlock_bh(&skb->sk->sk_callback_lock); 369 read_unlock_bh(&skb->sk->sk_callback_lock);
367 } 370 }
368} 371}
@@ -431,12 +434,9 @@ ip6t_log_packet(unsigned int pf,
431} 434}
432 435
433static unsigned int 436static unsigned int
434ip6t_log_target(struct sk_buff *skb, 437log_tg6(struct sk_buff *skb, const struct net_device *in,
435 const struct net_device *in, 438 const struct net_device *out, unsigned int hooknum,
436 const struct net_device *out, 439 const struct xt_target *target, const void *targinfo)
437 unsigned int hooknum,
438 const struct xt_target *target,
439 const void *targinfo)
440{ 440{
441 const struct ip6t_log_info *loginfo = targinfo; 441 const struct ip6t_log_info *loginfo = targinfo;
442 struct nf_loginfo li; 442 struct nf_loginfo li;
@@ -450,11 +450,10 @@ ip6t_log_target(struct sk_buff *skb,
450} 450}
451 451
452 452
453static bool ip6t_log_checkentry(const char *tablename, 453static bool
454 const void *entry, 454log_tg6_check(const char *tablename, const void *entry,
455 const struct xt_target *target, 455 const struct xt_target *target, void *targinfo,
456 void *targinfo, 456 unsigned int hook_mask)
457 unsigned int hook_mask)
458{ 457{
459 const struct ip6t_log_info *loginfo = targinfo; 458 const struct ip6t_log_info *loginfo = targinfo;
460 459
@@ -470,37 +469,37 @@ static bool ip6t_log_checkentry(const char *tablename,
470 return true; 469 return true;
471} 470}
472 471
473static struct xt_target ip6t_log_reg __read_mostly = { 472static struct xt_target log_tg6_reg __read_mostly = {
474 .name = "LOG", 473 .name = "LOG",
475 .family = AF_INET6, 474 .family = AF_INET6,
476 .target = ip6t_log_target, 475 .target = log_tg6,
477 .targetsize = sizeof(struct ip6t_log_info), 476 .targetsize = sizeof(struct ip6t_log_info),
478 .checkentry = ip6t_log_checkentry, 477 .checkentry = log_tg6_check,
479 .me = THIS_MODULE, 478 .me = THIS_MODULE,
480}; 479};
481 480
482static struct nf_logger ip6t_logger = { 481static const struct nf_logger ip6t_logger = {
483 .name = "ip6t_LOG", 482 .name = "ip6t_LOG",
484 .logfn = &ip6t_log_packet, 483 .logfn = &ip6t_log_packet,
485 .me = THIS_MODULE, 484 .me = THIS_MODULE,
486}; 485};
487 486
488static int __init ip6t_log_init(void) 487static int __init log_tg6_init(void)
489{ 488{
490 int ret; 489 int ret;
491 490
492 ret = xt_register_target(&ip6t_log_reg); 491 ret = xt_register_target(&log_tg6_reg);
493 if (ret < 0) 492 if (ret < 0)
494 return ret; 493 return ret;
495 nf_log_register(PF_INET6, &ip6t_logger); 494 nf_log_register(PF_INET6, &ip6t_logger);
496 return 0; 495 return 0;
497} 496}
498 497
499static void __exit ip6t_log_fini(void) 498static void __exit log_tg6_exit(void)
500{ 499{
501 nf_log_unregister(&ip6t_logger); 500 nf_log_unregister(&ip6t_logger);
502 xt_unregister_target(&ip6t_log_reg); 501 xt_unregister_target(&log_tg6_reg);
503} 502}
504 503
505module_init(ip6t_log_init); 504module_init(log_tg6_init);
506module_exit(ip6t_log_fini); 505module_exit(log_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 1a7d2917545d..b23baa635fe0 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -31,7 +31,7 @@
31#include <linux/netfilter_ipv6/ip6t_REJECT.h> 31#include <linux/netfilter_ipv6/ip6t_REJECT.h>
32 32
33MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); 33MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
34MODULE_DESCRIPTION("IP6 tables REJECT target module"); 34MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
35MODULE_LICENSE("GPL"); 35MODULE_LICENSE("GPL");
36 36
37/* Send RST reply */ 37/* Send RST reply */
@@ -121,7 +121,6 @@ static void send_reset(struct sk_buff *oldskb)
121 ip6h->version = 6; 121 ip6h->version = 6;
122 ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); 122 ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
123 ip6h->nexthdr = IPPROTO_TCP; 123 ip6h->nexthdr = IPPROTO_TCP;
124 ip6h->payload_len = htons(sizeof(struct tcphdr));
125 ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); 124 ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
126 ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); 125 ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
127 126
@@ -159,25 +158,22 @@ static void send_reset(struct sk_buff *oldskb)
159 158
160 nf_ct_attach(nskb, oldskb); 159 nf_ct_attach(nskb, oldskb);
161 160
162 NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, 161 ip6_local_out(nskb);
163 dst_output);
164} 162}
165 163
166static inline void 164static inline void
167send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) 165send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
168{ 166{
169 if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) 167 if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
170 skb_in->dev = init_net.loopback_dev; 168 skb_in->dev = init_net.loopback_dev;
171 169
172 icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); 170 icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
173} 171}
174 172
175static unsigned int reject6_target(struct sk_buff *skb, 173static unsigned int
176 const struct net_device *in, 174reject_tg6(struct sk_buff *skb, const struct net_device *in,
177 const struct net_device *out, 175 const struct net_device *out, unsigned int hooknum,
178 unsigned int hooknum, 176 const struct xt_target *target, const void *targinfo)
179 const struct xt_target *target,
180 const void *targinfo)
181{ 177{
182 const struct ip6t_reject_info *reject = targinfo; 178 const struct ip6t_reject_info *reject = targinfo;
183 179
@@ -216,11 +212,10 @@ static unsigned int reject6_target(struct sk_buff *skb,
216 return NF_DROP; 212 return NF_DROP;
217} 213}
218 214
219static bool check(const char *tablename, 215static bool
220 const void *entry, 216reject_tg6_check(const char *tablename, const void *entry,
221 const struct xt_target *target, 217 const struct xt_target *target, void *targinfo,
222 void *targinfo, 218 unsigned int hook_mask)
223 unsigned int hook_mask)
224{ 219{
225 const struct ip6t_reject_info *rejinfo = targinfo; 220 const struct ip6t_reject_info *rejinfo = targinfo;
226 const struct ip6t_entry *e = entry; 221 const struct ip6t_entry *e = entry;
@@ -239,27 +234,27 @@ static bool check(const char *tablename,
239 return true; 234 return true;
240} 235}
241 236
242static struct xt_target ip6t_reject_reg __read_mostly = { 237static struct xt_target reject_tg6_reg __read_mostly = {
243 .name = "REJECT", 238 .name = "REJECT",
244 .family = AF_INET6, 239 .family = AF_INET6,
245 .target = reject6_target, 240 .target = reject_tg6,
246 .targetsize = sizeof(struct ip6t_reject_info), 241 .targetsize = sizeof(struct ip6t_reject_info),
247 .table = "filter", 242 .table = "filter",
248 .hooks = (1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | 243 .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
249 (1 << NF_IP6_LOCAL_OUT), 244 (1 << NF_INET_LOCAL_OUT),
250 .checkentry = check, 245 .checkentry = reject_tg6_check,
251 .me = THIS_MODULE 246 .me = THIS_MODULE
252}; 247};
253 248
254static int __init ip6t_reject_init(void) 249static int __init reject_tg6_init(void)
255{ 250{
256 return xt_register_target(&ip6t_reject_reg); 251 return xt_register_target(&reject_tg6_reg);
257} 252}
258 253
259static void __exit ip6t_reject_fini(void) 254static void __exit reject_tg6_exit(void)
260{ 255{
261 xt_unregister_target(&ip6t_reject_reg); 256 xt_unregister_target(&reject_tg6_reg);
262} 257}
263 258
264module_init(ip6t_reject_init); 259module_init(reject_tg6_init);
265module_exit(ip6t_reject_fini); 260module_exit(reject_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 2a25fe25e0e0..429629fd63b6 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -20,7 +20,7 @@
20#include <linux/netfilter_ipv6/ip6t_ah.h> 20#include <linux/netfilter_ipv6/ip6t_ah.h>
21 21
22MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
23MODULE_DESCRIPTION("IPv6 AH match"); 23MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match");
24MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 24MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
25 25
26/* Returns 1 if the spi is matched by the range, 0 otherwise */ 26/* Returns 1 if the spi is matched by the range, 0 otherwise */
@@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
37} 37}
38 38
39static bool 39static bool
40match(const struct sk_buff *skb, 40ah_mt6(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 41 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 42 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
43 const struct xt_match *match,
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 43{
49 struct ip_auth_hdr _ah; 44 struct ip_auth_hdr _ah;
50 const struct ip_auth_hdr *ah; 45 const struct ip_auth_hdr *ah;
@@ -100,11 +95,9 @@ match(const struct sk_buff *skb,
100 95
101/* Called when user tries to insert an entry of this type. */ 96/* Called when user tries to insert an entry of this type. */
102static bool 97static bool
103checkentry(const char *tablename, 98ah_mt6_check(const char *tablename, const void *entry,
104 const void *entry, 99 const struct xt_match *match, void *matchinfo,
105 const struct xt_match *match, 100 unsigned int hook_mask)
106 void *matchinfo,
107 unsigned int hook_mask)
108{ 101{
109 const struct ip6t_ah *ahinfo = matchinfo; 102 const struct ip6t_ah *ahinfo = matchinfo;
110 103
@@ -115,24 +108,24 @@ checkentry(const char *tablename,
115 return true; 108 return true;
116} 109}
117 110
118static struct xt_match ah_match __read_mostly = { 111static struct xt_match ah_mt6_reg __read_mostly = {
119 .name = "ah", 112 .name = "ah",
120 .family = AF_INET6, 113 .family = AF_INET6,
121 .match = match, 114 .match = ah_mt6,
122 .matchsize = sizeof(struct ip6t_ah), 115 .matchsize = sizeof(struct ip6t_ah),
123 .checkentry = checkentry, 116 .checkentry = ah_mt6_check,
124 .me = THIS_MODULE, 117 .me = THIS_MODULE,
125}; 118};
126 119
127static int __init ip6t_ah_init(void) 120static int __init ah_mt6_init(void)
128{ 121{
129 return xt_register_match(&ah_match); 122 return xt_register_match(&ah_mt6_reg);
130} 123}
131 124
132static void __exit ip6t_ah_fini(void) 125static void __exit ah_mt6_exit(void)
133{ 126{
134 xt_unregister_match(&ah_match); 127 xt_unregister_match(&ah_mt6_reg);
135} 128}
136 129
137module_init(ip6t_ah_init); 130module_init(ah_mt6_init);
138module_exit(ip6t_ah_fini); 131module_exit(ah_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 34ba150bfe5d..8f331f12b2ec 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -15,19 +15,15 @@
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv6/ip6_tables.h> 16#include <linux/netfilter_ipv6/ip6_tables.h>
17 17
18MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); 18MODULE_DESCRIPTION("Xtables: IPv6 EUI64 address match");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 20MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
21 21
22static bool 22static bool
23match(const struct sk_buff *skb, 23eui64_mt6(const struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, 24 const struct net_device *out, const struct xt_match *match,
25 const struct net_device *out, 25 const void *matchinfo, int offset, unsigned int protoff,
26 const struct xt_match *match, 26 bool *hotdrop)
27 const void *matchinfo,
28 int offset,
29 unsigned int protoff,
30 bool *hotdrop)
31{ 27{
32 unsigned char eui64[8]; 28 unsigned char eui64[8];
33 int i = 0; 29 int i = 0;
@@ -47,7 +43,7 @@ match(const struct sk_buff *skb,
47 memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3); 43 memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
48 eui64[3] = 0xff; 44 eui64[3] = 0xff;
49 eui64[4] = 0xfe; 45 eui64[4] = 0xfe;
50 eui64[0] |= 0x02; 46 eui64[0] ^= 0x02;
51 47
52 i = 0; 48 i = 0;
53 while (ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i] 49 while (ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i]
@@ -62,25 +58,25 @@ match(const struct sk_buff *skb,
62 return false; 58 return false;
63} 59}
64 60
65static struct xt_match eui64_match __read_mostly = { 61static struct xt_match eui64_mt6_reg __read_mostly = {
66 .name = "eui64", 62 .name = "eui64",
67 .family = AF_INET6, 63 .family = AF_INET6,
68 .match = match, 64 .match = eui64_mt6,
69 .matchsize = sizeof(int), 65 .matchsize = sizeof(int),
70 .hooks = (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) | 66 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
71 (1 << NF_IP6_FORWARD), 67 (1 << NF_INET_FORWARD),
72 .me = THIS_MODULE, 68 .me = THIS_MODULE,
73}; 69};
74 70
75static int __init ip6t_eui64_init(void) 71static int __init eui64_mt6_init(void)
76{ 72{
77 return xt_register_match(&eui64_match); 73 return xt_register_match(&eui64_mt6_reg);
78} 74}
79 75
80static void __exit ip6t_eui64_fini(void) 76static void __exit eui64_mt6_exit(void)
81{ 77{
82 xt_unregister_match(&eui64_match); 78 xt_unregister_match(&eui64_mt6_reg);
83} 79}
84 80
85module_init(ip6t_eui64_init); 81module_init(eui64_mt6_init);
86module_exit(ip6t_eui64_fini); 82module_exit(eui64_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 968aeba02073..e2bbc63dba5b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -19,7 +19,7 @@
19#include <linux/netfilter_ipv6/ip6t_frag.h> 19#include <linux/netfilter_ipv6/ip6t_frag.h>
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_DESCRIPTION("IPv6 FRAG match"); 22MODULE_DESCRIPTION("Xtables: IPv6 fragment match");
23MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 23MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
24 24
25/* Returns 1 if the id is matched by the range, 0 otherwise */ 25/* Returns 1 if the id is matched by the range, 0 otherwise */
@@ -35,14 +35,10 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
35} 35}
36 36
37static bool 37static bool
38match(const struct sk_buff *skb, 38frag_mt6(const struct sk_buff *skb, const struct net_device *in,
39 const struct net_device *in, 39 const struct net_device *out, const struct xt_match *match,
40 const struct net_device *out, 40 const void *matchinfo, int offset, unsigned int protoff,
41 const struct xt_match *match, 41 bool *hotdrop)
42 const void *matchinfo,
43 int offset,
44 unsigned int protoff,
45 bool *hotdrop)
46{ 42{
47 struct frag_hdr _frag; 43 struct frag_hdr _frag;
48 const struct frag_hdr *fh; 44 const struct frag_hdr *fh;
@@ -116,11 +112,9 @@ match(const struct sk_buff *skb,
116 112
117/* Called when user tries to insert an entry of this type. */ 113/* Called when user tries to insert an entry of this type. */
118static bool 114static bool
119checkentry(const char *tablename, 115frag_mt6_check(const char *tablename, const void *ip,
120 const void *ip, 116 const struct xt_match *match, void *matchinfo,
121 const struct xt_match *match, 117 unsigned int hook_mask)
122 void *matchinfo,
123 unsigned int hook_mask)
124{ 118{
125 const struct ip6t_frag *fraginfo = matchinfo; 119 const struct ip6t_frag *fraginfo = matchinfo;
126 120
@@ -131,24 +125,24 @@ checkentry(const char *tablename,
131 return true; 125 return true;
132} 126}
133 127
134static struct xt_match frag_match __read_mostly = { 128static struct xt_match frag_mt6_reg __read_mostly = {
135 .name = "frag", 129 .name = "frag",
136 .family = AF_INET6, 130 .family = AF_INET6,
137 .match = match, 131 .match = frag_mt6,
138 .matchsize = sizeof(struct ip6t_frag), 132 .matchsize = sizeof(struct ip6t_frag),
139 .checkentry = checkentry, 133 .checkentry = frag_mt6_check,
140 .me = THIS_MODULE, 134 .me = THIS_MODULE,
141}; 135};
142 136
143static int __init ip6t_frag_init(void) 137static int __init frag_mt6_init(void)
144{ 138{
145 return xt_register_match(&frag_match); 139 return xt_register_match(&frag_mt6_reg);
146} 140}
147 141
148static void __exit ip6t_frag_fini(void) 142static void __exit frag_mt6_exit(void)
149{ 143{
150 xt_unregister_match(&frag_match); 144 xt_unregister_match(&frag_mt6_reg);
151} 145}
152 146
153module_init(ip6t_frag_init); 147module_init(frag_mt6_init);
154module_exit(ip6t_frag_fini); 148module_exit(frag_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e6ca6018b1ea..62e39ace0588 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter_ipv6/ip6t_opts.h> 21#include <linux/netfilter_ipv6/ip6t_opts.h>
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_DESCRIPTION("IPv6 opts match"); 24MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match");
25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
26MODULE_ALIAS("ip6t_dst"); 26MODULE_ALIAS("ip6t_dst");
27 27
@@ -42,14 +42,10 @@ MODULE_ALIAS("ip6t_dst");
42 */ 42 */
43 43
44static bool 44static bool
45match(const struct sk_buff *skb, 45hbh_mt6(const struct sk_buff *skb, const struct net_device *in,
46 const struct net_device *in, 46 const struct net_device *out, const struct xt_match *match,
47 const struct net_device *out, 47 const void *matchinfo, int offset, unsigned int protoff,
48 const struct xt_match *match, 48 bool *hotdrop)
49 const void *matchinfo,
50 int offset,
51 unsigned int protoff,
52 bool *hotdrop)
53{ 49{
54 struct ipv6_opt_hdr _optsh; 50 struct ipv6_opt_hdr _optsh;
55 const struct ipv6_opt_hdr *oh; 51 const struct ipv6_opt_hdr *oh;
@@ -171,11 +167,9 @@ match(const struct sk_buff *skb,
171 167
172/* Called when user tries to insert an entry of this type. */ 168/* Called when user tries to insert an entry of this type. */
173static bool 169static bool
174checkentry(const char *tablename, 170hbh_mt6_check(const char *tablename, const void *entry,
175 const void *entry, 171 const struct xt_match *match, void *matchinfo,
176 const struct xt_match *match, 172 unsigned int hook_mask)
177 void *matchinfo,
178 unsigned int hook_mask)
179{ 173{
180 const struct ip6t_opts *optsinfo = matchinfo; 174 const struct ip6t_opts *optsinfo = matchinfo;
181 175
@@ -186,36 +180,36 @@ checkentry(const char *tablename,
186 return true; 180 return true;
187} 181}
188 182
189static struct xt_match opts_match[] __read_mostly = { 183static struct xt_match hbh_mt6_reg[] __read_mostly = {
190 { 184 {
191 .name = "hbh", 185 .name = "hbh",
192 .family = AF_INET6, 186 .family = AF_INET6,
193 .match = match, 187 .match = hbh_mt6,
194 .matchsize = sizeof(struct ip6t_opts), 188 .matchsize = sizeof(struct ip6t_opts),
195 .checkentry = checkentry, 189 .checkentry = hbh_mt6_check,
196 .me = THIS_MODULE, 190 .me = THIS_MODULE,
197 .data = NEXTHDR_HOP, 191 .data = NEXTHDR_HOP,
198 }, 192 },
199 { 193 {
200 .name = "dst", 194 .name = "dst",
201 .family = AF_INET6, 195 .family = AF_INET6,
202 .match = match, 196 .match = hbh_mt6,
203 .matchsize = sizeof(struct ip6t_opts), 197 .matchsize = sizeof(struct ip6t_opts),
204 .checkentry = checkentry, 198 .checkentry = hbh_mt6_check,
205 .me = THIS_MODULE, 199 .me = THIS_MODULE,
206 .data = NEXTHDR_DEST, 200 .data = NEXTHDR_DEST,
207 }, 201 },
208}; 202};
209 203
210static int __init ip6t_hbh_init(void) 204static int __init hbh_mt6_init(void)
211{ 205{
212 return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); 206 return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
213} 207}
214 208
215static void __exit ip6t_hbh_fini(void) 209static void __exit hbh_mt6_exit(void)
216{ 210{
217 xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); 211 xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
218} 212}
219 213
220module_init(ip6t_hbh_init); 214module_init(hbh_mt6_init);
221module_exit(ip6t_hbh_fini); 215module_exit(hbh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index ca29ec00dc18..345671673845 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -16,13 +16,13 @@
16#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/x_tables.h>
17 17
18MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); 18MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
19MODULE_DESCRIPTION("IP tables Hop Limit matching module"); 19MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21 21
22static bool match(const struct sk_buff *skb, 22static bool
23 const struct net_device *in, const struct net_device *out, 23hl_mt6(const struct sk_buff *skb, const struct net_device *in,
24 const struct xt_match *match, const void *matchinfo, 24 const struct net_device *out, const struct xt_match *match,
25 int offset, unsigned int protoff, bool *hotdrop) 25 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
26{ 26{
27 const struct ip6t_hl_info *info = matchinfo; 27 const struct ip6t_hl_info *info = matchinfo;
28 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 28 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -49,23 +49,23 @@ static bool match(const struct sk_buff *skb,
49 return false; 49 return false;
50} 50}
51 51
52static struct xt_match hl_match __read_mostly = { 52static struct xt_match hl_mt6_reg __read_mostly = {
53 .name = "hl", 53 .name = "hl",
54 .family = AF_INET6, 54 .family = AF_INET6,
55 .match = match, 55 .match = hl_mt6,
56 .matchsize = sizeof(struct ip6t_hl_info), 56 .matchsize = sizeof(struct ip6t_hl_info),
57 .me = THIS_MODULE, 57 .me = THIS_MODULE,
58}; 58};
59 59
60static int __init ip6t_hl_init(void) 60static int __init hl_mt6_init(void)
61{ 61{
62 return xt_register_match(&hl_match); 62 return xt_register_match(&hl_mt6_reg);
63} 63}
64 64
65static void __exit ip6t_hl_fini(void) 65static void __exit hl_mt6_exit(void)
66{ 66{
67 xt_unregister_match(&hl_match); 67 xt_unregister_match(&hl_mt6_reg);
68} 68}
69 69
70module_init(ip6t_hl_init); 70module_init(hl_mt6_init);
71module_exit(ip6t_hl_fini); 71module_exit(hl_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 2c65c2f9a4ab..3a940171f829 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -23,18 +23,14 @@
23#include <linux/netfilter_ipv6/ip6t_ipv6header.h> 23#include <linux/netfilter_ipv6/ip6t_ipv6header.h>
24 24
25MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
26MODULE_DESCRIPTION("IPv6 headers match"); 26MODULE_DESCRIPTION("Xtables: IPv6 header types match");
27MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 27MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
28 28
29static bool 29static bool
30ipv6header_match(const struct sk_buff *skb, 30ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in,
31 const struct net_device *in, 31 const struct net_device *out, const struct xt_match *match,
32 const struct net_device *out, 32 const void *matchinfo, int offset, unsigned int protoff,
33 const struct xt_match *match, 33 bool *hotdrop)
34 const void *matchinfo,
35 int offset,
36 unsigned int protoff,
37 bool *hotdrop)
38{ 34{
39 const struct ip6t_ipv6header_info *info = matchinfo; 35 const struct ip6t_ipv6header_info *info = matchinfo;
40 unsigned int temp; 36 unsigned int temp;
@@ -125,11 +121,9 @@ ipv6header_match(const struct sk_buff *skb,
125} 121}
126 122
127static bool 123static bool
128ipv6header_checkentry(const char *tablename, 124ipv6header_mt6_check(const char *tablename, const void *ip,
129 const void *ip, 125 const struct xt_match *match, void *matchinfo,
130 const struct xt_match *match, 126 unsigned int hook_mask)
131 void *matchinfo,
132 unsigned int hook_mask)
133{ 127{
134 const struct ip6t_ipv6header_info *info = matchinfo; 128 const struct ip6t_ipv6header_info *info = matchinfo;
135 129
@@ -141,25 +135,25 @@ ipv6header_checkentry(const char *tablename,
141 return true; 135 return true;
142} 136}
143 137
144static struct xt_match ip6t_ipv6header_match __read_mostly = { 138static struct xt_match ipv6header_mt6_reg __read_mostly = {
145 .name = "ipv6header", 139 .name = "ipv6header",
146 .family = AF_INET6, 140 .family = AF_INET6,
147 .match = &ipv6header_match, 141 .match = ipv6header_mt6,
148 .matchsize = sizeof(struct ip6t_ipv6header_info), 142 .matchsize = sizeof(struct ip6t_ipv6header_info),
149 .checkentry = &ipv6header_checkentry, 143 .checkentry = ipv6header_mt6_check,
150 .destroy = NULL, 144 .destroy = NULL,
151 .me = THIS_MODULE, 145 .me = THIS_MODULE,
152}; 146};
153 147
154static int __init ipv6header_init(void) 148static int __init ipv6header_mt6_init(void)
155{ 149{
156 return xt_register_match(&ip6t_ipv6header_match); 150 return xt_register_match(&ipv6header_mt6_reg);
157} 151}
158 152
159static void __exit ipv6header_exit(void) 153static void __exit ipv6header_mt6_exit(void)
160{ 154{
161 xt_unregister_match(&ip6t_ipv6header_match); 155 xt_unregister_match(&ipv6header_mt6_reg);
162} 156}
163 157
164module_init(ipv6header_init); 158module_init(ipv6header_mt6_init);
165module_exit(ipv6header_exit); 159module_exit(ipv6header_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 0fa714092dc9..e06678d07ec8 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter/x_tables.h> 21#include <linux/netfilter/x_tables.h>
22#include <linux/netfilter_ipv6/ip6t_mh.h> 22#include <linux/netfilter_ipv6/ip6t_mh.h>
23 23
24MODULE_DESCRIPTION("ip6t_tables match for MH"); 24MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
25MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
26 26
27#ifdef DEBUG_IP_FIREWALL_USER 27#ifdef DEBUG_IP_FIREWALL_USER
@@ -38,14 +38,9 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
38} 38}
39 39
40static bool 40static bool
41match(const struct sk_buff *skb, 41mh_mt6(const struct sk_buff *skb, const struct net_device *in,
42 const struct net_device *in, 42 const struct net_device *out, const struct xt_match *match,
43 const struct net_device *out, 43 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
44 const struct xt_match *match,
45 const void *matchinfo,
46 int offset,
47 unsigned int protoff,
48 bool *hotdrop)
49{ 44{
50 struct ip6_mh _mh; 45 struct ip6_mh _mh;
51 const struct ip6_mh *mh; 46 const struct ip6_mh *mh;
@@ -77,11 +72,9 @@ match(const struct sk_buff *skb,
77 72
78/* Called when user tries to insert an entry of this type. */ 73/* Called when user tries to insert an entry of this type. */
79static bool 74static bool
80mh_checkentry(const char *tablename, 75mh_mt6_check(const char *tablename, const void *entry,
81 const void *entry, 76 const struct xt_match *match, void *matchinfo,
82 const struct xt_match *match, 77 unsigned int hook_mask)
83 void *matchinfo,
84 unsigned int hook_mask)
85{ 78{
86 const struct ip6t_mh *mhinfo = matchinfo; 79 const struct ip6t_mh *mhinfo = matchinfo;
87 80
@@ -89,25 +82,25 @@ mh_checkentry(const char *tablename,
89 return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); 82 return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
90} 83}
91 84
92static struct xt_match mh_match __read_mostly = { 85static struct xt_match mh_mt6_reg __read_mostly = {
93 .name = "mh", 86 .name = "mh",
94 .family = AF_INET6, 87 .family = AF_INET6,
95 .checkentry = mh_checkentry, 88 .checkentry = mh_mt6_check,
96 .match = match, 89 .match = mh_mt6,
97 .matchsize = sizeof(struct ip6t_mh), 90 .matchsize = sizeof(struct ip6t_mh),
98 .proto = IPPROTO_MH, 91 .proto = IPPROTO_MH,
99 .me = THIS_MODULE, 92 .me = THIS_MODULE,
100}; 93};
101 94
102static int __init ip6t_mh_init(void) 95static int __init mh_mt6_init(void)
103{ 96{
104 return xt_register_match(&mh_match); 97 return xt_register_match(&mh_mt6_reg);
105} 98}
106 99
107static void __exit ip6t_mh_fini(void) 100static void __exit mh_mt6_exit(void)
108{ 101{
109 xt_unregister_match(&mh_match); 102 xt_unregister_match(&mh_mt6_reg);
110} 103}
111 104
112module_init(ip6t_mh_init); 105module_init(mh_mt6_init);
113module_exit(ip6t_mh_fini); 106module_exit(mh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
deleted file mode 100644
index 6036613aef36..000000000000
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ /dev/null
@@ -1,92 +0,0 @@
1/* Kernel module to match various things tied to sockets associated with
2 locally generated outgoing packets. */
3
4/* (C) 2000-2001 Marc Boucher <marc@mbsi.ca>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/file.h>
14#include <linux/rcupdate.h>
15#include <net/sock.h>
16
17#include <linux/netfilter_ipv6/ip6t_owner.h>
18#include <linux/netfilter_ipv6/ip6_tables.h>
19#include <linux/netfilter/x_tables.h>
20
21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
22MODULE_DESCRIPTION("IP6 tables owner matching module");
23MODULE_LICENSE("GPL");
24
25
26static bool
27match(const struct sk_buff *skb,
28 const struct net_device *in,
29 const struct net_device *out,
30 const struct xt_match *match,
31 const void *matchinfo,
32 int offset,
33 unsigned int protoff,
34 bool *hotdrop)
35{
36 const struct ip6t_owner_info *info = matchinfo;
37
38 if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
39 return false;
40
41 if (info->match & IP6T_OWNER_UID)
42 if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
43 !!(info->invert & IP6T_OWNER_UID))
44 return false;
45
46 if (info->match & IP6T_OWNER_GID)
47 if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
48 !!(info->invert & IP6T_OWNER_GID))
49 return false;
50
51 return true;
52}
53
54static bool
55checkentry(const char *tablename,
56 const void *ip,
57 const struct xt_match *match,
58 void *matchinfo,
59 unsigned int hook_mask)
60{
61 const struct ip6t_owner_info *info = matchinfo;
62
63 if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
64 printk("ipt_owner: pid and sid matching "
65 "not supported anymore\n");
66 return false;
67 }
68 return true;
69}
70
71static struct xt_match owner_match __read_mostly = {
72 .name = "owner",
73 .family = AF_INET6,
74 .match = match,
75 .matchsize = sizeof(struct ip6t_owner_info),
76 .hooks = (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING),
77 .checkentry = checkentry,
78 .me = THIS_MODULE,
79};
80
81static int __init ip6t_owner_init(void)
82{
83 return xt_register_match(&owner_match);
84}
85
86static void __exit ip6t_owner_fini(void)
87{
88 xt_unregister_match(&owner_match);
89}
90
91module_init(ip6t_owner_init);
92module_exit(ip6t_owner_fini);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 357cea703bd9..12a9efe9886e 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter_ipv6/ip6t_rt.h> 21#include <linux/netfilter_ipv6/ip6t_rt.h>
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_DESCRIPTION("IPv6 RT match"); 24MODULE_DESCRIPTION("Xtables: IPv6 Routing Header match");
25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
26 26
27/* Returns 1 if the id is matched by the range, 0 otherwise */ 27/* Returns 1 if the id is matched by the range, 0 otherwise */
@@ -37,14 +37,9 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
37} 37}
38 38
39static bool 39static bool
40match(const struct sk_buff *skb, 40rt_mt6(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 41 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 42 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
43 const struct xt_match *match,
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 43{
49 struct ipv6_rt_hdr _route; 44 struct ipv6_rt_hdr _route;
50 const struct ipv6_rt_hdr *rh; 45 const struct ipv6_rt_hdr *rh;
@@ -195,11 +190,9 @@ match(const struct sk_buff *skb,
195 190
196/* Called when user tries to insert an entry of this type. */ 191/* Called when user tries to insert an entry of this type. */
197static bool 192static bool
198checkentry(const char *tablename, 193rt_mt6_check(const char *tablename, const void *entry,
199 const void *entry, 194 const struct xt_match *match, void *matchinfo,
200 const struct xt_match *match, 195 unsigned int hook_mask)
201 void *matchinfo,
202 unsigned int hook_mask)
203{ 196{
204 const struct ip6t_rt *rtinfo = matchinfo; 197 const struct ip6t_rt *rtinfo = matchinfo;
205 198
@@ -218,24 +211,24 @@ checkentry(const char *tablename,
218 return true; 211 return true;
219} 212}
220 213
221static struct xt_match rt_match __read_mostly = { 214static struct xt_match rt_mt6_reg __read_mostly = {
222 .name = "rt", 215 .name = "rt",
223 .family = AF_INET6, 216 .family = AF_INET6,
224 .match = match, 217 .match = rt_mt6,
225 .matchsize = sizeof(struct ip6t_rt), 218 .matchsize = sizeof(struct ip6t_rt),
226 .checkentry = checkentry, 219 .checkentry = rt_mt6_check,
227 .me = THIS_MODULE, 220 .me = THIS_MODULE,
228}; 221};
229 222
230static int __init ip6t_rt_init(void) 223static int __init rt_mt6_init(void)
231{ 224{
232 return xt_register_match(&rt_match); 225 return xt_register_match(&rt_mt6_reg);
233} 226}
234 227
235static void __exit ip6t_rt_fini(void) 228static void __exit rt_mt6_exit(void)
236{ 229{
237 xt_unregister_match(&rt_match); 230 xt_unregister_match(&rt_mt6_reg);
238} 231}
239 232
240module_init(ip6t_rt_init); 233module_init(rt_mt6_init);
241module_exit(ip6t_rt_fini); 234module_exit(rt_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 1d26b202bf30..2d9cd095a72c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -17,28 +17,30 @@ MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 17MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
18MODULE_DESCRIPTION("ip6tables filter table"); 18MODULE_DESCRIPTION("ip6tables filter table");
19 19
20#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT)) 20#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
21 (1 << NF_INET_FORWARD) | \
22 (1 << NF_INET_LOCAL_OUT))
21 23
22static struct 24static struct
23{ 25{
24 struct ip6t_replace repl; 26 struct ip6t_replace repl;
25 struct ip6t_standard entries[3]; 27 struct ip6t_standard entries[3];
26 struct ip6t_error term; 28 struct ip6t_error term;
27} initial_table __initdata = { 29} initial_table __net_initdata = {
28 .repl = { 30 .repl = {
29 .name = "filter", 31 .name = "filter",
30 .valid_hooks = FILTER_VALID_HOOKS, 32 .valid_hooks = FILTER_VALID_HOOKS,
31 .num_entries = 4, 33 .num_entries = 4,
32 .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), 34 .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
33 .hook_entry = { 35 .hook_entry = {
34 [NF_IP6_LOCAL_IN] = 0, 36 [NF_INET_LOCAL_IN] = 0,
35 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), 37 [NF_INET_FORWARD] = sizeof(struct ip6t_standard),
36 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 38 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
37 }, 39 },
38 .underflow = { 40 .underflow = {
39 [NF_IP6_LOCAL_IN] = 0, 41 [NF_INET_LOCAL_IN] = 0,
40 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), 42 [NF_INET_FORWARD] = sizeof(struct ip6t_standard),
41 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 43 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
42 }, 44 },
43 }, 45 },
44 .entries = { 46 .entries = {
@@ -65,7 +67,7 @@ ip6t_hook(unsigned int hook,
65 const struct net_device *out, 67 const struct net_device *out,
66 int (*okfn)(struct sk_buff *)) 68 int (*okfn)(struct sk_buff *))
67{ 69{
68 return ip6t_do_table(skb, hook, in, out, &packet_filter); 70 return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter);
69} 71}
70 72
71static unsigned int 73static unsigned int
@@ -85,29 +87,29 @@ ip6t_local_out_hook(unsigned int hook,
85 } 87 }
86#endif 88#endif
87 89
88 return ip6t_do_table(skb, hook, in, out, &packet_filter); 90 return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter);
89} 91}
90 92
91static struct nf_hook_ops ip6t_ops[] = { 93static struct nf_hook_ops ip6t_ops[] __read_mostly = {
92 { 94 {
93 .hook = ip6t_hook, 95 .hook = ip6t_hook,
94 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
95 .pf = PF_INET6, 97 .pf = PF_INET6,
96 .hooknum = NF_IP6_LOCAL_IN, 98 .hooknum = NF_INET_LOCAL_IN,
97 .priority = NF_IP6_PRI_FILTER, 99 .priority = NF_IP6_PRI_FILTER,
98 }, 100 },
99 { 101 {
100 .hook = ip6t_hook, 102 .hook = ip6t_hook,
101 .owner = THIS_MODULE, 103 .owner = THIS_MODULE,
102 .pf = PF_INET6, 104 .pf = PF_INET6,
103 .hooknum = NF_IP6_FORWARD, 105 .hooknum = NF_INET_FORWARD,
104 .priority = NF_IP6_PRI_FILTER, 106 .priority = NF_IP6_PRI_FILTER,
105 }, 107 },
106 { 108 {
107 .hook = ip6t_local_out_hook, 109 .hook = ip6t_local_out_hook,
108 .owner = THIS_MODULE, 110 .owner = THIS_MODULE,
109 .pf = PF_INET6, 111 .pf = PF_INET6,
110 .hooknum = NF_IP6_LOCAL_OUT, 112 .hooknum = NF_INET_LOCAL_OUT,
111 .priority = NF_IP6_PRI_FILTER, 113 .priority = NF_IP6_PRI_FILTER,
112 }, 114 },
113}; 115};
@@ -116,6 +118,26 @@ static struct nf_hook_ops ip6t_ops[] = {
116static int forward = NF_ACCEPT; 118static int forward = NF_ACCEPT;
117module_param(forward, bool, 0000); 119module_param(forward, bool, 0000);
118 120
121static int __net_init ip6table_filter_net_init(struct net *net)
122{
123 /* Register table */
124 net->ipv6.ip6table_filter =
125 ip6t_register_table(net, &packet_filter, &initial_table.repl);
126 if (IS_ERR(net->ipv6.ip6table_filter))
127 return PTR_ERR(net->ipv6.ip6table_filter);
128 return 0;
129}
130
131static void __net_exit ip6table_filter_net_exit(struct net *net)
132{
133 ip6t_unregister_table(net->ipv6.ip6table_filter);
134}
135
136static struct pernet_operations ip6table_filter_net_ops = {
137 .init = ip6table_filter_net_init,
138 .exit = ip6table_filter_net_exit,
139};
140
119static int __init ip6table_filter_init(void) 141static int __init ip6table_filter_init(void)
120{ 142{
121 int ret; 143 int ret;
@@ -128,8 +150,7 @@ static int __init ip6table_filter_init(void)
128 /* Entry 1 is the FORWARD hook */ 150 /* Entry 1 is the FORWARD hook */
129 initial_table.entries[1].target.verdict = -forward - 1; 151 initial_table.entries[1].target.verdict = -forward - 1;
130 152
131 /* Register table */ 153 ret = register_pernet_subsys(&ip6table_filter_net_ops);
132 ret = ip6t_register_table(&packet_filter, &initial_table.repl);
133 if (ret < 0) 154 if (ret < 0)
134 return ret; 155 return ret;
135 156
@@ -141,14 +162,14 @@ static int __init ip6table_filter_init(void)
141 return ret; 162 return ret;
142 163
143 cleanup_table: 164 cleanup_table:
144 ip6t_unregister_table(&packet_filter); 165 unregister_pernet_subsys(&ip6table_filter_net_ops);
145 return ret; 166 return ret;
146} 167}
147 168
148static void __exit ip6table_filter_fini(void) 169static void __exit ip6table_filter_fini(void)
149{ 170{
150 nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); 171 nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
151 ip6t_unregister_table(&packet_filter); 172 unregister_pernet_subsys(&ip6table_filter_net_ops);
152} 173}
153 174
154module_init(ip6table_filter_init); 175module_init(ip6table_filter_init);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a0b6381f1e8c..035343a90ffe 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -15,36 +15,36 @@ MODULE_LICENSE("GPL");
15MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 15MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
16MODULE_DESCRIPTION("ip6tables mangle table"); 16MODULE_DESCRIPTION("ip6tables mangle table");
17 17
18#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \ 18#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
19 (1 << NF_IP6_LOCAL_IN) | \ 19 (1 << NF_INET_LOCAL_IN) | \
20 (1 << NF_IP6_FORWARD) | \ 20 (1 << NF_INET_FORWARD) | \
21 (1 << NF_IP6_LOCAL_OUT) | \ 21 (1 << NF_INET_LOCAL_OUT) | \
22 (1 << NF_IP6_POST_ROUTING)) 22 (1 << NF_INET_POST_ROUTING))
23 23
24static struct 24static struct
25{ 25{
26 struct ip6t_replace repl; 26 struct ip6t_replace repl;
27 struct ip6t_standard entries[5]; 27 struct ip6t_standard entries[5];
28 struct ip6t_error term; 28 struct ip6t_error term;
29} initial_table __initdata = { 29} initial_table __net_initdata = {
30 .repl = { 30 .repl = {
31 .name = "mangle", 31 .name = "mangle",
32 .valid_hooks = MANGLE_VALID_HOOKS, 32 .valid_hooks = MANGLE_VALID_HOOKS,
33 .num_entries = 6, 33 .num_entries = 6,
34 .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), 34 .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
35 .hook_entry = { 35 .hook_entry = {
36 [NF_IP6_PRE_ROUTING] = 0, 36 [NF_INET_PRE_ROUTING] = 0,
37 [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), 37 [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard),
38 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, 38 [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2,
39 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, 39 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3,
40 [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, 40 [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4,
41 }, 41 },
42 .underflow = { 42 .underflow = {
43 [NF_IP6_PRE_ROUTING] = 0, 43 [NF_INET_PRE_ROUTING] = 0,
44 [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), 44 [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard),
45 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, 45 [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2,
46 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, 46 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3,
47 [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, 47 [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4,
48 }, 48 },
49 }, 49 },
50 .entries = { 50 .entries = {
@@ -73,7 +73,7 @@ ip6t_route_hook(unsigned int hook,
73 const struct net_device *out, 73 const struct net_device *out,
74 int (*okfn)(struct sk_buff *)) 74 int (*okfn)(struct sk_buff *))
75{ 75{
76 return ip6t_do_table(skb, hook, in, out, &packet_mangler); 76 return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle);
77} 77}
78 78
79static unsigned int 79static unsigned int
@@ -108,7 +108,7 @@ ip6t_local_hook(unsigned int hook,
108 /* flowlabel and prio (includes version, which shouldn't change either */ 108 /* flowlabel and prio (includes version, which shouldn't change either */
109 flowlabel = *((u_int32_t *)ipv6_hdr(skb)); 109 flowlabel = *((u_int32_t *)ipv6_hdr(skb));
110 110
111 ret = ip6t_do_table(skb, hook, in, out, &packet_mangler); 111 ret = ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle);
112 112
113 if (ret != NF_DROP && ret != NF_STOLEN 113 if (ret != NF_DROP && ret != NF_STOLEN
114 && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) 114 && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr))
@@ -120,50 +120,69 @@ ip6t_local_hook(unsigned int hook,
120 return ret; 120 return ret;
121} 121}
122 122
123static struct nf_hook_ops ip6t_ops[] = { 123static struct nf_hook_ops ip6t_ops[] __read_mostly = {
124 { 124 {
125 .hook = ip6t_route_hook, 125 .hook = ip6t_route_hook,
126 .owner = THIS_MODULE, 126 .owner = THIS_MODULE,
127 .pf = PF_INET6, 127 .pf = PF_INET6,
128 .hooknum = NF_IP6_PRE_ROUTING, 128 .hooknum = NF_INET_PRE_ROUTING,
129 .priority = NF_IP6_PRI_MANGLE, 129 .priority = NF_IP6_PRI_MANGLE,
130 }, 130 },
131 { 131 {
132 .hook = ip6t_local_hook, 132 .hook = ip6t_local_hook,
133 .owner = THIS_MODULE, 133 .owner = THIS_MODULE,
134 .pf = PF_INET6, 134 .pf = PF_INET6,
135 .hooknum = NF_IP6_LOCAL_IN, 135 .hooknum = NF_INET_LOCAL_IN,
136 .priority = NF_IP6_PRI_MANGLE, 136 .priority = NF_IP6_PRI_MANGLE,
137 }, 137 },
138 { 138 {
139 .hook = ip6t_route_hook, 139 .hook = ip6t_route_hook,
140 .owner = THIS_MODULE, 140 .owner = THIS_MODULE,
141 .pf = PF_INET6, 141 .pf = PF_INET6,
142 .hooknum = NF_IP6_FORWARD, 142 .hooknum = NF_INET_FORWARD,
143 .priority = NF_IP6_PRI_MANGLE, 143 .priority = NF_IP6_PRI_MANGLE,
144 }, 144 },
145 { 145 {
146 .hook = ip6t_local_hook, 146 .hook = ip6t_local_hook,
147 .owner = THIS_MODULE, 147 .owner = THIS_MODULE,
148 .pf = PF_INET6, 148 .pf = PF_INET6,
149 .hooknum = NF_IP6_LOCAL_OUT, 149 .hooknum = NF_INET_LOCAL_OUT,
150 .priority = NF_IP6_PRI_MANGLE, 150 .priority = NF_IP6_PRI_MANGLE,
151 }, 151 },
152 { 152 {
153 .hook = ip6t_route_hook, 153 .hook = ip6t_route_hook,
154 .owner = THIS_MODULE, 154 .owner = THIS_MODULE,
155 .pf = PF_INET6, 155 .pf = PF_INET6,
156 .hooknum = NF_IP6_POST_ROUTING, 156 .hooknum = NF_INET_POST_ROUTING,
157 .priority = NF_IP6_PRI_MANGLE, 157 .priority = NF_IP6_PRI_MANGLE,
158 }, 158 },
159}; 159};
160 160
161static int __net_init ip6table_mangle_net_init(struct net *net)
162{
163 /* Register table */
164 net->ipv6.ip6table_mangle =
165 ip6t_register_table(net, &packet_mangler, &initial_table.repl);
166 if (IS_ERR(net->ipv6.ip6table_mangle))
167 return PTR_ERR(net->ipv6.ip6table_mangle);
168 return 0;
169}
170
171static void __net_exit ip6table_mangle_net_exit(struct net *net)
172{
173 ip6t_unregister_table(net->ipv6.ip6table_mangle);
174}
175
176static struct pernet_operations ip6table_mangle_net_ops = {
177 .init = ip6table_mangle_net_init,
178 .exit = ip6table_mangle_net_exit,
179};
180
161static int __init ip6table_mangle_init(void) 181static int __init ip6table_mangle_init(void)
162{ 182{
163 int ret; 183 int ret;
164 184
165 /* Register table */ 185 ret = register_pernet_subsys(&ip6table_mangle_net_ops);
166 ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
167 if (ret < 0) 186 if (ret < 0)
168 return ret; 187 return ret;
169 188
@@ -175,14 +194,14 @@ static int __init ip6table_mangle_init(void)
175 return ret; 194 return ret;
176 195
177 cleanup_table: 196 cleanup_table:
178 ip6t_unregister_table(&packet_mangler); 197 unregister_pernet_subsys(&ip6table_mangle_net_ops);
179 return ret; 198 return ret;
180} 199}
181 200
182static void __exit ip6table_mangle_fini(void) 201static void __exit ip6table_mangle_fini(void)
183{ 202{
184 nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); 203 nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
185 ip6t_unregister_table(&packet_mangler); 204 unregister_pernet_subsys(&ip6table_mangle_net_ops);
186} 205}
187 206
188module_init(ip6table_mangle_init); 207module_init(ip6table_mangle_init);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 8f7109f991e6..5cd84203abfe 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -6,26 +6,26 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/netfilter_ipv6/ip6_tables.h> 7#include <linux/netfilter_ipv6/ip6_tables.h>
8 8
9#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) 9#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
10 10
11static struct 11static struct
12{ 12{
13 struct ip6t_replace repl; 13 struct ip6t_replace repl;
14 struct ip6t_standard entries[2]; 14 struct ip6t_standard entries[2];
15 struct ip6t_error term; 15 struct ip6t_error term;
16} initial_table __initdata = { 16} initial_table __net_initdata = {
17 .repl = { 17 .repl = {
18 .name = "raw", 18 .name = "raw",
19 .valid_hooks = RAW_VALID_HOOKS, 19 .valid_hooks = RAW_VALID_HOOKS,
20 .num_entries = 3, 20 .num_entries = 3,
21 .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), 21 .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
22 .hook_entry = { 22 .hook_entry = {
23 [NF_IP6_PRE_ROUTING] = 0, 23 [NF_INET_PRE_ROUTING] = 0,
24 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) 24 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
25 }, 25 },
26 .underflow = { 26 .underflow = {
27 [NF_IP6_PRE_ROUTING] = 0, 27 [NF_INET_PRE_ROUTING] = 0,
28 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) 28 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
29 }, 29 },
30 }, 30 },
31 .entries = { 31 .entries = {
@@ -51,32 +51,51 @@ ip6t_hook(unsigned int hook,
51 const struct net_device *out, 51 const struct net_device *out,
52 int (*okfn)(struct sk_buff *)) 52 int (*okfn)(struct sk_buff *))
53{ 53{
54 return ip6t_do_table(skb, hook, in, out, &packet_raw); 54 return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_raw);
55} 55}
56 56
57static struct nf_hook_ops ip6t_ops[] = { 57static struct nf_hook_ops ip6t_ops[] __read_mostly = {
58 { 58 {
59 .hook = ip6t_hook, 59 .hook = ip6t_hook,
60 .pf = PF_INET6, 60 .pf = PF_INET6,
61 .hooknum = NF_IP6_PRE_ROUTING, 61 .hooknum = NF_INET_PRE_ROUTING,
62 .priority = NF_IP6_PRI_FIRST, 62 .priority = NF_IP6_PRI_FIRST,
63 .owner = THIS_MODULE, 63 .owner = THIS_MODULE,
64 }, 64 },
65 { 65 {
66 .hook = ip6t_hook, 66 .hook = ip6t_hook,
67 .pf = PF_INET6, 67 .pf = PF_INET6,
68 .hooknum = NF_IP6_LOCAL_OUT, 68 .hooknum = NF_INET_LOCAL_OUT,
69 .priority = NF_IP6_PRI_FIRST, 69 .priority = NF_IP6_PRI_FIRST,
70 .owner = THIS_MODULE, 70 .owner = THIS_MODULE,
71 }, 71 },
72}; 72};
73 73
74static int __net_init ip6table_raw_net_init(struct net *net)
75{
76 /* Register table */
77 net->ipv6.ip6table_raw =
78 ip6t_register_table(net, &packet_raw, &initial_table.repl);
79 if (IS_ERR(net->ipv6.ip6table_raw))
80 return PTR_ERR(net->ipv6.ip6table_raw);
81 return 0;
82}
83
84static void __net_exit ip6table_raw_net_exit(struct net *net)
85{
86 ip6t_unregister_table(net->ipv6.ip6table_raw);
87}
88
89static struct pernet_operations ip6table_raw_net_ops = {
90 .init = ip6table_raw_net_init,
91 .exit = ip6table_raw_net_exit,
92};
93
74static int __init ip6table_raw_init(void) 94static int __init ip6table_raw_init(void)
75{ 95{
76 int ret; 96 int ret;
77 97
78 /* Register table */ 98 ret = register_pernet_subsys(&ip6table_raw_net_ops);
79 ret = ip6t_register_table(&packet_raw, &initial_table.repl);
80 if (ret < 0) 99 if (ret < 0)
81 return ret; 100 return ret;
82 101
@@ -88,14 +107,14 @@ static int __init ip6table_raw_init(void)
88 return ret; 107 return ret;
89 108
90 cleanup_table: 109 cleanup_table:
91 ip6t_unregister_table(&packet_raw); 110 unregister_pernet_subsys(&ip6table_raw_net_ops);
92 return ret; 111 return ret;
93} 112}
94 113
95static void __exit ip6table_raw_fini(void) 114static void __exit ip6table_raw_fini(void)
96{ 115{
97 nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); 116 nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
98 ip6t_unregister_table(&packet_raw); 117 unregister_pernet_subsys(&ip6table_raw_net_ops);
99} 118}
100 119
101module_init(ip6table_raw_init); 120module_init(ip6table_raw_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ad74bab05047..3717bdf34f6e 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -30,7 +30,8 @@
30static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 30static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
31 struct nf_conntrack_tuple *tuple) 31 struct nf_conntrack_tuple *tuple)
32{ 32{
33 u_int32_t _addrs[8], *ap; 33 const u_int32_t *ap;
34 u_int32_t _addrs[8];
34 35
35 ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), 36 ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
36 sizeof(_addrs), _addrs); 37 sizeof(_addrs), _addrs);
@@ -60,12 +61,6 @@ static int ipv6_print_tuple(struct seq_file *s,
60 NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); 61 NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
61} 62}
62 63
63static int ipv6_print_conntrack(struct seq_file *s,
64 const struct nf_conn *conntrack)
65{
66 return 0;
67}
68
69/* 64/*
70 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c 65 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
71 * 66 *
@@ -152,8 +147,8 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
152 int (*okfn)(struct sk_buff *)) 147 int (*okfn)(struct sk_buff *))
153{ 148{
154 struct nf_conn *ct; 149 struct nf_conn *ct;
155 struct nf_conn_help *help; 150 const struct nf_conn_help *help;
156 struct nf_conntrack_helper *helper; 151 const struct nf_conntrack_helper *helper;
157 enum ip_conntrack_info ctinfo; 152 enum ip_conntrack_info ctinfo;
158 unsigned int ret, protoff; 153 unsigned int ret, protoff;
159 unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 154 unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
@@ -258,80 +253,51 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
258 return ipv6_conntrack_in(hooknum, skb, in, out, okfn); 253 return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
259} 254}
260 255
261static struct nf_hook_ops ipv6_conntrack_ops[] = { 256static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
262 { 257 {
263 .hook = ipv6_defrag, 258 .hook = ipv6_defrag,
264 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
265 .pf = PF_INET6, 260 .pf = PF_INET6,
266 .hooknum = NF_IP6_PRE_ROUTING, 261 .hooknum = NF_INET_PRE_ROUTING,
267 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, 262 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
268 }, 263 },
269 { 264 {
270 .hook = ipv6_conntrack_in, 265 .hook = ipv6_conntrack_in,
271 .owner = THIS_MODULE, 266 .owner = THIS_MODULE,
272 .pf = PF_INET6, 267 .pf = PF_INET6,
273 .hooknum = NF_IP6_PRE_ROUTING, 268 .hooknum = NF_INET_PRE_ROUTING,
274 .priority = NF_IP6_PRI_CONNTRACK, 269 .priority = NF_IP6_PRI_CONNTRACK,
275 }, 270 },
276 { 271 {
277 .hook = ipv6_conntrack_local, 272 .hook = ipv6_conntrack_local,
278 .owner = THIS_MODULE, 273 .owner = THIS_MODULE,
279 .pf = PF_INET6, 274 .pf = PF_INET6,
280 .hooknum = NF_IP6_LOCAL_OUT, 275 .hooknum = NF_INET_LOCAL_OUT,
281 .priority = NF_IP6_PRI_CONNTRACK, 276 .priority = NF_IP6_PRI_CONNTRACK,
282 }, 277 },
283 { 278 {
284 .hook = ipv6_defrag, 279 .hook = ipv6_defrag,
285 .owner = THIS_MODULE, 280 .owner = THIS_MODULE,
286 .pf = PF_INET6, 281 .pf = PF_INET6,
287 .hooknum = NF_IP6_LOCAL_OUT, 282 .hooknum = NF_INET_LOCAL_OUT,
288 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, 283 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
289 }, 284 },
290 { 285 {
291 .hook = ipv6_confirm, 286 .hook = ipv6_confirm,
292 .owner = THIS_MODULE, 287 .owner = THIS_MODULE,
293 .pf = PF_INET6, 288 .pf = PF_INET6,
294 .hooknum = NF_IP6_POST_ROUTING, 289 .hooknum = NF_INET_POST_ROUTING,
295 .priority = NF_IP6_PRI_LAST, 290 .priority = NF_IP6_PRI_LAST,
296 }, 291 },
297 { 292 {
298 .hook = ipv6_confirm, 293 .hook = ipv6_confirm,
299 .owner = THIS_MODULE, 294 .owner = THIS_MODULE,
300 .pf = PF_INET6, 295 .pf = PF_INET6,
301 .hooknum = NF_IP6_LOCAL_IN, 296 .hooknum = NF_INET_LOCAL_IN,
302 .priority = NF_IP6_PRI_LAST-1, 297 .priority = NF_IP6_PRI_LAST-1,
303 }, 298 },
304}; 299};
305 300
306#ifdef CONFIG_SYSCTL
307static ctl_table nf_ct_ipv6_sysctl_table[] = {
308 {
309 .procname = "nf_conntrack_frag6_timeout",
310 .data = &nf_frags_ctl.timeout,
311 .maxlen = sizeof(unsigned int),
312 .mode = 0644,
313 .proc_handler = &proc_dointvec_jiffies,
314 },
315 {
316 .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
317 .procname = "nf_conntrack_frag6_low_thresh",
318 .data = &nf_frags_ctl.low_thresh,
319 .maxlen = sizeof(unsigned int),
320 .mode = 0644,
321 .proc_handler = &proc_dointvec,
322 },
323 {
324 .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
325 .procname = "nf_conntrack_frag6_high_thresh",
326 .data = &nf_frags_ctl.high_thresh,
327 .maxlen = sizeof(unsigned int),
328 .mode = 0644,
329 .proc_handler = &proc_dointvec,
330 },
331 { .ctl_name = 0 }
332};
333#endif
334
335#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 301#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
336 302
337#include <linux/netfilter/nfnetlink.h> 303#include <linux/netfilter/nfnetlink.h>
@@ -376,7 +342,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
376 .pkt_to_tuple = ipv6_pkt_to_tuple, 342 .pkt_to_tuple = ipv6_pkt_to_tuple,
377 .invert_tuple = ipv6_invert_tuple, 343 .invert_tuple = ipv6_invert_tuple,
378 .print_tuple = ipv6_print_tuple, 344 .print_tuple = ipv6_print_tuple,
379 .print_conntrack = ipv6_print_conntrack,
380 .get_l4proto = ipv6_get_l4proto, 345 .get_l4proto = ipv6_get_l4proto,
381#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 346#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
382 .tuple_to_nlattr = ipv6_tuple_to_nlattr, 347 .tuple_to_nlattr = ipv6_tuple_to_nlattr,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index fd9123f3dc04..0897d0f4c4a2 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -24,6 +24,7 @@
24#include <net/netfilter/nf_conntrack_l4proto.h> 24#include <net/netfilter/nf_conntrack_l4proto.h>
25#include <net/netfilter/nf_conntrack_core.h> 25#include <net/netfilter/nf_conntrack_core.h>
26#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 26#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
27#include <net/netfilter/nf_log.h>
27 28
28static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; 29static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
29 30
@@ -31,7 +32,8 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
31 unsigned int dataoff, 32 unsigned int dataoff,
32 struct nf_conntrack_tuple *tuple) 33 struct nf_conntrack_tuple *tuple)
33{ 34{
34 struct icmp6hdr _hdr, *hp; 35 const struct icmp6hdr *hp;
36 struct icmp6hdr _hdr;
35 37
36 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 38 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
37 if (hp == NULL) 39 if (hp == NULL)
@@ -44,7 +46,7 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
44} 46}
45 47
46/* Add 1; spaces filled with 0. */ 48/* Add 1; spaces filled with 0. */
47static u_int8_t invmap[] = { 49static const u_int8_t invmap[] = {
48 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, 50 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
49 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, 51 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
50 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, 52 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
@@ -74,13 +76,6 @@ static int icmpv6_print_tuple(struct seq_file *s,
74 ntohs(tuple->src.u.icmp.id)); 76 ntohs(tuple->src.u.icmp.id));
75} 77}
76 78
77/* Print out the private part of the conntrack. */
78static int icmpv6_print_conntrack(struct seq_file *s,
79 const struct nf_conn *conntrack)
80{
81 return 0;
82}
83
84/* Returns verdict for packet, or -1 for invalid. */ 79/* Returns verdict for packet, or -1 for invalid. */
85static int icmpv6_packet(struct nf_conn *ct, 80static int icmpv6_packet(struct nf_conn *ct,
86 const struct sk_buff *skb, 81 const struct sk_buff *skb,
@@ -107,24 +102,24 @@ static int icmpv6_packet(struct nf_conn *ct,
107} 102}
108 103
109/* Called when a new connection for this protocol found. */ 104/* Called when a new connection for this protocol found. */
110static int icmpv6_new(struct nf_conn *conntrack, 105static int icmpv6_new(struct nf_conn *ct,
111 const struct sk_buff *skb, 106 const struct sk_buff *skb,
112 unsigned int dataoff) 107 unsigned int dataoff)
113{ 108{
114 static u_int8_t valid_new[] = { 109 static const u_int8_t valid_new[] = {
115 [ICMPV6_ECHO_REQUEST - 128] = 1, 110 [ICMPV6_ECHO_REQUEST - 128] = 1,
116 [ICMPV6_NI_QUERY - 128] = 1 111 [ICMPV6_NI_QUERY - 128] = 1
117 }; 112 };
118 int type = conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128; 113 int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
119 114
120 if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { 115 if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
121 /* Can't create a new ICMPv6 `conn' with this. */ 116 /* Can't create a new ICMPv6 `conn' with this. */
122 pr_debug("icmpv6: can't create new conn with type %u\n", 117 pr_debug("icmpv6: can't create new conn with type %u\n",
123 type + 128); 118 type + 128);
124 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); 119 NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple);
125 return 0; 120 return 0;
126 } 121 }
127 atomic_set(&conntrack->proto.icmp.count, 0); 122 atomic_set(&ct->proto.icmp.count, 0);
128 return 1; 123 return 1;
129} 124}
130 125
@@ -135,8 +130,8 @@ icmpv6_error_message(struct sk_buff *skb,
135 unsigned int hooknum) 130 unsigned int hooknum)
136{ 131{
137 struct nf_conntrack_tuple intuple, origtuple; 132 struct nf_conntrack_tuple intuple, origtuple;
138 struct nf_conntrack_tuple_hash *h; 133 const struct nf_conntrack_tuple_hash *h;
139 struct nf_conntrack_l4proto *inproto; 134 const struct nf_conntrack_l4proto *inproto;
140 135
141 NF_CT_ASSERT(skb->nfct == NULL); 136 NF_CT_ASSERT(skb->nfct == NULL);
142 137
@@ -182,7 +177,8 @@ static int
182icmpv6_error(struct sk_buff *skb, unsigned int dataoff, 177icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
183 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) 178 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
184{ 179{
185 struct icmp6hdr _ih, *icmp6h; 180 const struct icmp6hdr *icmp6h;
181 struct icmp6hdr _ih;
186 182
187 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); 183 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
188 if (icmp6h == NULL) { 184 if (icmp6h == NULL) {
@@ -192,7 +188,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
192 return -NF_ACCEPT; 188 return -NF_ACCEPT;
193 } 189 }
194 190
195 if (nf_conntrack_checksum && hooknum == NF_IP6_PRE_ROUTING && 191 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
196 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { 192 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
197 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 193 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
198 "nf_ct_icmpv6: ICMPv6 checksum failed\n"); 194 "nf_ct_icmpv6: ICMPv6 checksum failed\n");
@@ -213,12 +209,9 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
213static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, 209static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
214 const struct nf_conntrack_tuple *t) 210 const struct nf_conntrack_tuple *t)
215{ 211{
216 NLA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), 212 NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id);
217 &t->src.u.icmp.id); 213 NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type);
218 NLA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), 214 NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code);
219 &t->dst.u.icmp.type);
220 NLA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
221 &t->dst.u.icmp.code);
222 215
223 return 0; 216 return 0;
224 217
@@ -240,12 +233,9 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
240 || !tb[CTA_PROTO_ICMPV6_ID]) 233 || !tb[CTA_PROTO_ICMPV6_ID])
241 return -EINVAL; 234 return -EINVAL;
242 235
243 tuple->dst.u.icmp.type = 236 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
244 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_TYPE]); 237 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
245 tuple->dst.u.icmp.code = 238 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
246 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_CODE]);
247 tuple->src.u.icmp.id =
248 *(__be16 *)nla_data(tb[CTA_PROTO_ICMPV6_ID]);
249 239
250 if (tuple->dst.u.icmp.type < 128 240 if (tuple->dst.u.icmp.type < 128
251 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) 241 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
@@ -280,7 +270,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
280 .pkt_to_tuple = icmpv6_pkt_to_tuple, 270 .pkt_to_tuple = icmpv6_pkt_to_tuple,
281 .invert_tuple = icmpv6_invert_tuple, 271 .invert_tuple = icmpv6_invert_tuple,
282 .print_tuple = icmpv6_print_tuple, 272 .print_tuple = icmpv6_print_tuple,
283 .print_conntrack = icmpv6_print_conntrack,
284 .packet = icmpv6_packet, 273 .packet = icmpv6_packet,
285 .new = icmpv6_new, 274 .new = icmpv6_new,
286 .error = icmpv6_error, 275 .error = icmpv6_error,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e170c67c47a5..2a0d698b24d5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -39,6 +39,7 @@
39#include <net/rawv6.h> 39#include <net/rawv6.h>
40#include <net/ndisc.h> 40#include <net/ndisc.h>
41#include <net/addrconf.h> 41#include <net/addrconf.h>
42#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
42#include <linux/sysctl.h> 43#include <linux/sysctl.h>
43#include <linux/netfilter.h> 44#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h> 45#include <linux/netfilter_ipv6.h>
@@ -70,14 +71,37 @@ struct nf_ct_frag6_queue
70 __u16 nhoffset; 71 __u16 nhoffset;
71}; 72};
72 73
73struct inet_frags_ctl nf_frags_ctl __read_mostly = {
74 .high_thresh = 256 * 1024,
75 .low_thresh = 192 * 1024,
76 .timeout = IPV6_FRAG_TIMEOUT,
77 .secret_interval = 10 * 60 * HZ,
78};
79
80static struct inet_frags nf_frags; 74static struct inet_frags nf_frags;
75static struct netns_frags nf_init_frags;
76
77#ifdef CONFIG_SYSCTL
78struct ctl_table nf_ct_ipv6_sysctl_table[] = {
79 {
80 .procname = "nf_conntrack_frag6_timeout",
81 .data = &nf_init_frags.timeout,
82 .maxlen = sizeof(unsigned int),
83 .mode = 0644,
84 .proc_handler = &proc_dointvec_jiffies,
85 },
86 {
87 .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
88 .procname = "nf_conntrack_frag6_low_thresh",
89 .data = &nf_init_frags.low_thresh,
90 .maxlen = sizeof(unsigned int),
91 .mode = 0644,
92 .proc_handler = &proc_dointvec,
93 },
94 {
95 .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
96 .procname = "nf_conntrack_frag6_high_thresh",
97 .data = &nf_init_frags.high_thresh,
98 .maxlen = sizeof(unsigned int),
99 .mode = 0644,
100 .proc_handler = &proc_dointvec,
101 },
102 { .ctl_name = 0 }
103};
104#endif
81 105
82static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, 106static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
83 struct in6_addr *daddr) 107 struct in6_addr *daddr)
@@ -125,7 +149,7 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
125{ 149{
126 if (work) 150 if (work)
127 *work -= skb->truesize; 151 *work -= skb->truesize;
128 atomic_sub(skb->truesize, &nf_frags.mem); 152 atomic_sub(skb->truesize, &nf_init_frags.mem);
129 nf_skb_free(skb); 153 nf_skb_free(skb);
130 kfree_skb(skb); 154 kfree_skb(skb);
131} 155}
@@ -147,7 +171,7 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
147 171
148static void nf_ct_frag6_evictor(void) 172static void nf_ct_frag6_evictor(void)
149{ 173{
150 inet_frag_evictor(&nf_frags); 174 inet_frag_evictor(&nf_init_frags, &nf_frags);
151} 175}
152 176
153static void nf_ct_frag6_expire(unsigned long data) 177static void nf_ct_frag6_expire(unsigned long data)
@@ -183,7 +207,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
183 arg.dst = dst; 207 arg.dst = dst;
184 hash = ip6qhashfn(id, src, dst); 208 hash = ip6qhashfn(id, src, dst);
185 209
186 q = inet_frag_find(&nf_frags, &arg, hash); 210 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
187 if (q == NULL) 211 if (q == NULL)
188 goto oom; 212 goto oom;
189 213
@@ -352,7 +376,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
352 skb->dev = NULL; 376 skb->dev = NULL;
353 fq->q.stamp = skb->tstamp; 377 fq->q.stamp = skb->tstamp;
354 fq->q.meat += skb->len; 378 fq->q.meat += skb->len;
355 atomic_add(skb->truesize, &nf_frags.mem); 379 atomic_add(skb->truesize, &nf_init_frags.mem);
356 380
357 /* The first fragment. 381 /* The first fragment.
358 * nhoffset is obtained from the first fragment, of course. 382 * nhoffset is obtained from the first fragment, of course.
@@ -362,7 +386,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
362 fq->q.last_in |= FIRST_IN; 386 fq->q.last_in |= FIRST_IN;
363 } 387 }
364 write_lock(&nf_frags.lock); 388 write_lock(&nf_frags.lock);
365 list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); 389 list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
366 write_unlock(&nf_frags.lock); 390 write_unlock(&nf_frags.lock);
367 return 0; 391 return 0;
368 392
@@ -429,7 +453,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
429 clone->ip_summed = head->ip_summed; 453 clone->ip_summed = head->ip_summed;
430 454
431 NFCT_FRAG6_CB(clone)->orig = NULL; 455 NFCT_FRAG6_CB(clone)->orig = NULL;
432 atomic_add(clone->truesize, &nf_frags.mem); 456 atomic_add(clone->truesize, &nf_init_frags.mem);
433 } 457 }
434 458
435 /* We have to remove fragment header from datagram and to relocate 459 /* We have to remove fragment header from datagram and to relocate
@@ -443,7 +467,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
443 skb_shinfo(head)->frag_list = head->next; 467 skb_shinfo(head)->frag_list = head->next;
444 skb_reset_transport_header(head); 468 skb_reset_transport_header(head);
445 skb_push(head, head->data - skb_network_header(head)); 469 skb_push(head, head->data - skb_network_header(head));
446 atomic_sub(head->truesize, &nf_frags.mem); 470 atomic_sub(head->truesize, &nf_init_frags.mem);
447 471
448 for (fp=head->next; fp; fp = fp->next) { 472 for (fp=head->next; fp; fp = fp->next) {
449 head->data_len += fp->len; 473 head->data_len += fp->len;
@@ -453,7 +477,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
453 else if (head->ip_summed == CHECKSUM_COMPLETE) 477 else if (head->ip_summed == CHECKSUM_COMPLETE)
454 head->csum = csum_add(head->csum, fp->csum); 478 head->csum = csum_add(head->csum, fp->csum);
455 head->truesize += fp->truesize; 479 head->truesize += fp->truesize;
456 atomic_sub(fp->truesize, &nf_frags.mem); 480 atomic_sub(fp->truesize, &nf_init_frags.mem);
457 } 481 }
458 482
459 head->next = NULL; 483 head->next = NULL;
@@ -603,7 +627,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
603 goto ret_orig; 627 goto ret_orig;
604 } 628 }
605 629
606 if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) 630 if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
607 nf_ct_frag6_evictor(); 631 nf_ct_frag6_evictor();
608 632
609 fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); 633 fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
@@ -657,24 +681,8 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
657 nf_conntrack_put_reasm(skb); 681 nf_conntrack_put_reasm(skb);
658} 682}
659 683
660int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
661{
662 struct sk_buff *s, *s2;
663
664 for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
665
666 s2 = s->next;
667 kfree_skb(s);
668 }
669
670 kfree_skb(skb);
671
672 return 0;
673}
674
675int nf_ct_frag6_init(void) 684int nf_ct_frag6_init(void)
676{ 685{
677 nf_frags.ctl = &nf_frags_ctl;
678 nf_frags.hashfn = nf_hashfn; 686 nf_frags.hashfn = nf_hashfn;
679 nf_frags.constructor = ip6_frag_init; 687 nf_frags.constructor = ip6_frag_init;
680 nf_frags.destructor = NULL; 688 nf_frags.destructor = NULL;
@@ -682,6 +690,11 @@ int nf_ct_frag6_init(void)
682 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); 690 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
683 nf_frags.match = ip6_frag_match; 691 nf_frags.match = ip6_frag_match;
684 nf_frags.frag_expire = nf_ct_frag6_expire; 692 nf_frags.frag_expire = nf_ct_frag6_expire;
693 nf_frags.secret_interval = 10 * 60 * HZ;
694 nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
695 nf_init_frags.high_thresh = 256 * 1024;
696 nf_init_frags.low_thresh = 192 * 1024;
697 inet_frags_init_net(&nf_init_frags);
685 inet_frags_init(&nf_frags); 698 inet_frags_init(&nf_frags);
686 699
687 return 0; 700 return 0;
@@ -691,6 +704,6 @@ void nf_ct_frag6_cleanup(void)
691{ 704{
692 inet_frags_fini(&nf_frags); 705 inet_frags_fini(&nf_frags);
693 706
694 nf_frags_ctl.low_thresh = 0; 707 nf_init_frags.low_thresh = 0;
695 nf_ct_frag6_evictor(); 708 nf_ct_frag6_evictor();
696} 709}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index be526ad92543..35e502a72495 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -27,34 +27,24 @@
27#include <net/ip.h> 27#include <net/ip.h>
28#include <net/sock.h> 28#include <net/sock.h>
29#include <net/tcp.h> 29#include <net/tcp.h>
30#include <net/udp.h>
30#include <net/transp_v6.h> 31#include <net/transp_v6.h>
31#include <net/ipv6.h> 32#include <net/ipv6.h>
32 33
33static struct proc_dir_entry *proc_net_devsnmp6; 34static struct proc_dir_entry *proc_net_devsnmp6;
34 35
35static int fold_prot_inuse(struct proto *proto)
36{
37 int res = 0;
38 int cpu;
39
40 for_each_possible_cpu(cpu)
41 res += proto->stats[cpu].inuse;
42
43 return res;
44}
45
46static int sockstat6_seq_show(struct seq_file *seq, void *v) 36static int sockstat6_seq_show(struct seq_file *seq, void *v)
47{ 37{
48 seq_printf(seq, "TCP6: inuse %d\n", 38 seq_printf(seq, "TCP6: inuse %d\n",
49 fold_prot_inuse(&tcpv6_prot)); 39 sock_prot_inuse_get(&tcpv6_prot));
50 seq_printf(seq, "UDP6: inuse %d\n", 40 seq_printf(seq, "UDP6: inuse %d\n",
51 fold_prot_inuse(&udpv6_prot)); 41 sock_prot_inuse_get(&udpv6_prot));
52 seq_printf(seq, "UDPLITE6: inuse %d\n", 42 seq_printf(seq, "UDPLITE6: inuse %d\n",
53 fold_prot_inuse(&udplitev6_prot)); 43 sock_prot_inuse_get(&udplitev6_prot));
54 seq_printf(seq, "RAW6: inuse %d\n", 44 seq_printf(seq, "RAW6: inuse %d\n",
55 fold_prot_inuse(&rawv6_prot)); 45 sock_prot_inuse_get(&rawv6_prot));
56 seq_printf(seq, "FRAG6: inuse %d memory %d\n", 46 seq_printf(seq, "FRAG6: inuse %d memory %d\n",
57 ip6_frag_nqueues(), ip6_frag_mem()); 47 ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net));
58 return 0; 48 return 0;
59} 49}
60 50
@@ -99,7 +89,7 @@ static char *icmp6type2name[256] = {
99 [ICMPV6_PKT_TOOBIG] = "PktTooBigs", 89 [ICMPV6_PKT_TOOBIG] = "PktTooBigs",
100 [ICMPV6_TIME_EXCEED] = "TimeExcds", 90 [ICMPV6_TIME_EXCEED] = "TimeExcds",
101 [ICMPV6_PARAMPROB] = "ParmProblems", 91 [ICMPV6_PARAMPROB] = "ParmProblems",
102 [ICMPV6_ECHO_REQUEST] = "EchoRequest", 92 [ICMPV6_ECHO_REQUEST] = "Echos",
103 [ICMPV6_ECHO_REPLY] = "EchoReplies", 93 [ICMPV6_ECHO_REPLY] = "EchoReplies",
104 [ICMPV6_MGM_QUERY] = "GroupMembQueries", 94 [ICMPV6_MGM_QUERY] = "GroupMembQueries",
105 [ICMPV6_MGM_REPORT] = "GroupMembResponses", 95 [ICMPV6_MGM_REPORT] = "GroupMembResponses",
@@ -109,7 +99,7 @@ static char *icmp6type2name[256] = {
109 [NDISC_ROUTER_SOLICITATION] = "RouterSolicits", 99 [NDISC_ROUTER_SOLICITATION] = "RouterSolicits",
110 [NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements", 100 [NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements",
111 [NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits", 101 [NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits",
112 [NDISC_REDIRECT] = "NeighborRedirects", 102 [NDISC_REDIRECT] = "Redirects",
113}; 103};
114 104
115 105
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ca24ef19cd8f..8897ccf8086a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -54,39 +54,31 @@
54#include <net/mip6.h> 54#include <net/mip6.h>
55#endif 55#endif
56 56
57#include <net/raw.h>
57#include <net/rawv6.h> 58#include <net/rawv6.h>
58#include <net/xfrm.h> 59#include <net/xfrm.h>
59 60
60#include <linux/proc_fs.h> 61#include <linux/proc_fs.h>
61#include <linux/seq_file.h> 62#include <linux/seq_file.h>
62 63
63struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; 64static struct raw_hashinfo raw_v6_hashinfo = {
64DEFINE_RWLOCK(raw_v6_lock); 65 .lock = __RW_LOCK_UNLOCKED(),
66};
65 67
66static void raw_v6_hash(struct sock *sk) 68static void raw_v6_hash(struct sock *sk)
67{ 69{
68 struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num & 70 raw_hash_sk(sk, &raw_v6_hashinfo);
69 (RAWV6_HTABLE_SIZE - 1)];
70
71 write_lock_bh(&raw_v6_lock);
72 sk_add_node(sk, list);
73 sock_prot_inc_use(sk->sk_prot);
74 write_unlock_bh(&raw_v6_lock);
75} 71}
76 72
77static void raw_v6_unhash(struct sock *sk) 73static void raw_v6_unhash(struct sock *sk)
78{ 74{
79 write_lock_bh(&raw_v6_lock); 75 raw_unhash_sk(sk, &raw_v6_hashinfo);
80 if (sk_del_node_init(sk))
81 sock_prot_dec_use(sk->sk_prot);
82 write_unlock_bh(&raw_v6_lock);
83} 76}
84 77
85 78
86/* Grumble... icmp and ip_input want to get at this... */ 79static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
87struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, 80 unsigned short num, struct in6_addr *loc_addr,
88 struct in6_addr *loc_addr, struct in6_addr *rmt_addr, 81 struct in6_addr *rmt_addr, int dif)
89 int dif)
90{ 82{
91 struct hlist_node *node; 83 struct hlist_node *node;
92 int is_multicast = ipv6_addr_is_multicast(loc_addr); 84 int is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -95,6 +87,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
95 if (inet_sk(sk)->num == num) { 87 if (inet_sk(sk)->num == num) {
96 struct ipv6_pinfo *np = inet6_sk(sk); 88 struct ipv6_pinfo *np = inet6_sk(sk);
97 89
90 if (sk->sk_net != net)
91 continue;
92
98 if (!ipv6_addr_any(&np->daddr) && 93 if (!ipv6_addr_any(&np->daddr) &&
99 !ipv6_addr_equal(&np->daddr, rmt_addr)) 94 !ipv6_addr_equal(&np->daddr, rmt_addr))
100 continue; 95 continue;
@@ -167,21 +162,22 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
167 * 162 *
168 * Caller owns SKB so we must make clones. 163 * Caller owns SKB so we must make clones.
169 */ 164 */
170int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) 165static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
171{ 166{
172 struct in6_addr *saddr; 167 struct in6_addr *saddr;
173 struct in6_addr *daddr; 168 struct in6_addr *daddr;
174 struct sock *sk; 169 struct sock *sk;
175 int delivered = 0; 170 int delivered = 0;
176 __u8 hash; 171 __u8 hash;
172 struct net *net;
177 173
178 saddr = &ipv6_hdr(skb)->saddr; 174 saddr = &ipv6_hdr(skb)->saddr;
179 daddr = saddr + 1; 175 daddr = saddr + 1;
180 176
181 hash = nexthdr & (MAX_INET_PROTOS - 1); 177 hash = nexthdr & (MAX_INET_PROTOS - 1);
182 178
183 read_lock(&raw_v6_lock); 179 read_lock(&raw_v6_hashinfo.lock);
184 sk = sk_head(&raw_v6_htable[hash]); 180 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
185 181
186 /* 182 /*
187 * The first socket found will be delivered after 183 * The first socket found will be delivered after
@@ -191,7 +187,8 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
191 if (sk == NULL) 187 if (sk == NULL)
192 goto out; 188 goto out;
193 189
194 sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); 190 net = skb->dev->nd_net;
191 sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
195 192
196 while (sk) { 193 while (sk) {
197 int filtered; 194 int filtered;
@@ -234,14 +231,25 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
234 rawv6_rcv(sk, clone); 231 rawv6_rcv(sk, clone);
235 } 232 }
236 } 233 }
237 sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, 234 sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
238 IP6CB(skb)->iif); 235 IP6CB(skb)->iif);
239 } 236 }
240out: 237out:
241 read_unlock(&raw_v6_lock); 238 read_unlock(&raw_v6_hashinfo.lock);
242 return delivered; 239 return delivered;
243} 240}
244 241
242int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
243{
244 struct sock *raw_sk;
245
246 raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
247 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
248 raw_sk = NULL;
249
250 return raw_sk != NULL;
251}
252
245/* This cleans up af_inet6 a bit. -DaveM */ 253/* This cleans up af_inet6 a bit. -DaveM */
246static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 254static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
247{ 255{
@@ -283,7 +291,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
283 if (!sk->sk_bound_dev_if) 291 if (!sk->sk_bound_dev_if)
284 goto out; 292 goto out;
285 293
286 dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); 294 dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if);
287 if (!dev) { 295 if (!dev) {
288 err = -ENODEV; 296 err = -ENODEV;
289 goto out; 297 goto out;
@@ -296,7 +304,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
296 v4addr = LOOPBACK4_IPV6; 304 v4addr = LOOPBACK4_IPV6;
297 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 305 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
298 err = -EADDRNOTAVAIL; 306 err = -EADDRNOTAVAIL;
299 if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { 307 if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr,
308 dev, 0)) {
300 if (dev) 309 if (dev)
301 dev_put(dev); 310 dev_put(dev);
302 goto out; 311 goto out;
@@ -316,7 +325,7 @@ out:
316 return err; 325 return err;
317} 326}
318 327
319void rawv6_err(struct sock *sk, struct sk_buff *skb, 328static void rawv6_err(struct sock *sk, struct sk_buff *skb,
320 struct inet6_skb_parm *opt, 329 struct inet6_skb_parm *opt,
321 int type, int code, int offset, __be32 info) 330 int type, int code, int offset, __be32 info)
322{ 331{
@@ -350,18 +359,45 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
350 } 359 }
351} 360}
352 361
362void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
363 int type, int code, int inner_offset, __be32 info)
364{
365 struct sock *sk;
366 int hash;
367 struct in6_addr *saddr, *daddr;
368 struct net *net;
369
370 hash = nexthdr & (RAW_HTABLE_SIZE - 1);
371
372 read_lock(&raw_v6_hashinfo.lock);
373 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
374 if (sk != NULL) {
375 saddr = &ipv6_hdr(skb)->saddr;
376 daddr = &ipv6_hdr(skb)->daddr;
377 net = skb->dev->nd_net;
378
379 while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
380 IP6CB(skb)->iif))) {
381 rawv6_err(sk, skb, NULL, type, code,
382 inner_offset, info);
383 sk = sk_next(sk);
384 }
385 }
386 read_unlock(&raw_v6_hashinfo.lock);
387}
388
353static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 389static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
354{ 390{
355 if ((raw6_sk(sk)->checksum || sk->sk_filter) && 391 if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
356 skb_checksum_complete(skb)) { 392 skb_checksum_complete(skb)) {
357 /* FIXME: increment a raw6 drops counter here */ 393 atomic_inc(&sk->sk_drops);
358 kfree_skb(skb); 394 kfree_skb(skb);
359 return 0; 395 return 0;
360 } 396 }
361 397
362 /* Charge it to the socket. */ 398 /* Charge it to the socket. */
363 if (sock_queue_rcv_skb(sk,skb)<0) { 399 if (sock_queue_rcv_skb(sk,skb)<0) {
364 /* FIXME: increment a raw6 drops counter here */ 400 atomic_inc(&sk->sk_drops);
365 kfree_skb(skb); 401 kfree_skb(skb);
366 return 0; 402 return 0;
367 } 403 }
@@ -382,6 +418,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
382 struct raw6_sock *rp = raw6_sk(sk); 418 struct raw6_sock *rp = raw6_sk(sk);
383 419
384 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 420 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
421 atomic_inc(&sk->sk_drops);
385 kfree_skb(skb); 422 kfree_skb(skb);
386 return NET_RX_DROP; 423 return NET_RX_DROP;
387 } 424 }
@@ -405,7 +442,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
405 442
406 if (inet->hdrincl) { 443 if (inet->hdrincl) {
407 if (skb_checksum_complete(skb)) { 444 if (skb_checksum_complete(skb)) {
408 /* FIXME: increment a raw6 drops counter here */ 445 atomic_inc(&sk->sk_drops);
409 kfree_skb(skb); 446 kfree_skb(skb);
410 return 0; 447 return 0;
411 } 448 }
@@ -496,7 +533,7 @@ csum_copy_err:
496 as some normal condition. 533 as some normal condition.
497 */ 534 */
498 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 535 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
499 /* FIXME: increment a raw6 drops counter here */ 536 atomic_inc(&sk->sk_drops);
500 goto out; 537 goto out;
501} 538}
502 539
@@ -604,6 +641,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
604 skb_reserve(skb, hh_len); 641 skb_reserve(skb, hh_len);
605 642
606 skb->priority = sk->sk_priority; 643 skb->priority = sk->sk_priority;
644 skb->mark = sk->sk_mark;
607 skb->dst = dst_clone(&rt->u.dst); 645 skb->dst = dst_clone(&rt->u.dst);
608 646
609 skb_put(skb, length); 647 skb_put(skb, length);
@@ -618,7 +656,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
618 goto error_fault; 656 goto error_fault;
619 657
620 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); 658 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
621 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 659 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
622 dst_output); 660 dst_output);
623 if (err > 0) 661 if (err > 0)
624 err = np->recverr ? net_xmit_errno(err) : 0; 662 err = np->recverr ? net_xmit_errno(err) : 0;
@@ -730,6 +768,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
730 */ 768 */
731 memset(&fl, 0, sizeof(fl)); 769 memset(&fl, 0, sizeof(fl));
732 770
771 fl.mark = sk->sk_mark;
772
733 if (sin6) { 773 if (sin6) {
734 if (addr_len < SIN6_LEN_RFC2133) 774 if (addr_len < SIN6_LEN_RFC2133)
735 return -EINVAL; 775 return -EINVAL;
@@ -843,7 +883,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
843 if (final_p) 883 if (final_p)
844 ipv6_addr_copy(&fl.fl6_dst, final_p); 884 ipv6_addr_copy(&fl.fl6_dst, final_p);
845 885
846 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 886 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
847 if (err == -EREMOTE) 887 if (err == -EREMOTE)
848 err = ip6_dst_blackhole(sk, &dst, &fl); 888 err = ip6_dst_blackhole(sk, &dst, &fl);
849 if (err < 0) 889 if (err < 0)
@@ -1144,6 +1184,8 @@ static int rawv6_init_sk(struct sock *sk)
1144 return(0); 1184 return(0);
1145} 1185}
1146 1186
1187DEFINE_PROTO_INUSE(rawv6)
1188
1147struct proto rawv6_prot = { 1189struct proto rawv6_prot = {
1148 .name = "RAWv6", 1190 .name = "RAWv6",
1149 .owner = THIS_MODULE, 1191 .owner = THIS_MODULE,
@@ -1166,79 +1208,10 @@ struct proto rawv6_prot = {
1166 .compat_setsockopt = compat_rawv6_setsockopt, 1208 .compat_setsockopt = compat_rawv6_setsockopt,
1167 .compat_getsockopt = compat_rawv6_getsockopt, 1209 .compat_getsockopt = compat_rawv6_getsockopt,
1168#endif 1210#endif
1211 REF_PROTO_INUSE(rawv6)
1169}; 1212};
1170 1213
1171#ifdef CONFIG_PROC_FS 1214#ifdef CONFIG_PROC_FS
1172struct raw6_iter_state {
1173 int bucket;
1174};
1175
1176#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private)
1177
1178static struct sock *raw6_get_first(struct seq_file *seq)
1179{
1180 struct sock *sk;
1181 struct hlist_node *node;
1182 struct raw6_iter_state* state = raw6_seq_private(seq);
1183
1184 for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket)
1185 sk_for_each(sk, node, &raw_v6_htable[state->bucket])
1186 if (sk->sk_family == PF_INET6)
1187 goto out;
1188 sk = NULL;
1189out:
1190 return sk;
1191}
1192
1193static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk)
1194{
1195 struct raw6_iter_state* state = raw6_seq_private(seq);
1196
1197 do {
1198 sk = sk_next(sk);
1199try_again:
1200 ;
1201 } while (sk && sk->sk_family != PF_INET6);
1202
1203 if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
1204 sk = sk_head(&raw_v6_htable[state->bucket]);
1205 goto try_again;
1206 }
1207 return sk;
1208}
1209
1210static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
1211{
1212 struct sock *sk = raw6_get_first(seq);
1213 if (sk)
1214 while (pos && (sk = raw6_get_next(seq, sk)) != NULL)
1215 --pos;
1216 return pos ? NULL : sk;
1217}
1218
1219static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
1220{
1221 read_lock(&raw_v6_lock);
1222 return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1223}
1224
1225static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1226{
1227 struct sock *sk;
1228
1229 if (v == SEQ_START_TOKEN)
1230 sk = raw6_get_first(seq);
1231 else
1232 sk = raw6_get_next(seq, v);
1233 ++*pos;
1234 return sk;
1235}
1236
1237static void raw6_seq_stop(struct seq_file *seq, void *v)
1238{
1239 read_unlock(&raw_v6_lock);
1240}
1241
1242static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 1215static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1243{ 1216{
1244 struct ipv6_pinfo *np = inet6_sk(sp); 1217 struct ipv6_pinfo *np = inet6_sk(sp);
@@ -1251,7 +1224,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1251 srcp = inet_sk(sp)->num; 1224 srcp = inet_sk(sp)->num;
1252 seq_printf(seq, 1225 seq_printf(seq,
1253 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1226 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1254 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", 1227 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
1255 i, 1228 i,
1256 src->s6_addr32[0], src->s6_addr32[1], 1229 src->s6_addr32[0], src->s6_addr32[1],
1257 src->s6_addr32[2], src->s6_addr32[3], srcp, 1230 src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1263,7 +1236,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1263 0, 0L, 0, 1236 0, 0L, 0,
1264 sock_i_uid(sp), 0, 1237 sock_i_uid(sp), 0,
1265 sock_i_ino(sp), 1238 sock_i_ino(sp),
1266 atomic_read(&sp->sk_refcnt), sp); 1239 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1267} 1240}
1268 1241
1269static int raw6_seq_show(struct seq_file *seq, void *v) 1242static int raw6_seq_show(struct seq_file *seq, void *v)
@@ -1274,23 +1247,22 @@ static int raw6_seq_show(struct seq_file *seq, void *v)
1274 "local_address " 1247 "local_address "
1275 "remote_address " 1248 "remote_address "
1276 "st tx_queue rx_queue tr tm->when retrnsmt" 1249 "st tx_queue rx_queue tr tm->when retrnsmt"
1277 " uid timeout inode\n"); 1250 " uid timeout inode drops\n");
1278 else 1251 else
1279 raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket); 1252 raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
1280 return 0; 1253 return 0;
1281} 1254}
1282 1255
1283static const struct seq_operations raw6_seq_ops = { 1256static const struct seq_operations raw6_seq_ops = {
1284 .start = raw6_seq_start, 1257 .start = raw_seq_start,
1285 .next = raw6_seq_next, 1258 .next = raw_seq_next,
1286 .stop = raw6_seq_stop, 1259 .stop = raw_seq_stop,
1287 .show = raw6_seq_show, 1260 .show = raw6_seq_show,
1288}; 1261};
1289 1262
1290static int raw6_seq_open(struct inode *inode, struct file *file) 1263static int raw6_seq_open(struct inode *inode, struct file *file)
1291{ 1264{
1292 return seq_open_private(file, &raw6_seq_ops, 1265 return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops);
1293 sizeof(struct raw6_iter_state));
1294} 1266}
1295 1267
1296static const struct file_operations raw6_seq_fops = { 1268static const struct file_operations raw6_seq_fops = {
@@ -1298,18 +1270,86 @@ static const struct file_operations raw6_seq_fops = {
1298 .open = raw6_seq_open, 1270 .open = raw6_seq_open,
1299 .read = seq_read, 1271 .read = seq_read,
1300 .llseek = seq_lseek, 1272 .llseek = seq_lseek,
1301 .release = seq_release_private, 1273 .release = seq_release_net,
1302}; 1274};
1303 1275
1304int __init raw6_proc_init(void) 1276static int raw6_init_net(struct net *net)
1305{ 1277{
1306 if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) 1278 if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
1307 return -ENOMEM; 1279 return -ENOMEM;
1280
1308 return 0; 1281 return 0;
1309} 1282}
1310 1283
1284static void raw6_exit_net(struct net *net)
1285{
1286 proc_net_remove(net, "raw6");
1287}
1288
1289static struct pernet_operations raw6_net_ops = {
1290 .init = raw6_init_net,
1291 .exit = raw6_exit_net,
1292};
1293
1294int __init raw6_proc_init(void)
1295{
1296 return register_pernet_subsys(&raw6_net_ops);
1297}
1298
1311void raw6_proc_exit(void) 1299void raw6_proc_exit(void)
1312{ 1300{
1313 proc_net_remove(&init_net, "raw6"); 1301 unregister_pernet_subsys(&raw6_net_ops);
1314} 1302}
1315#endif /* CONFIG_PROC_FS */ 1303#endif /* CONFIG_PROC_FS */
1304
1305/* Same as inet6_dgram_ops, sans udp_poll. */
1306static const struct proto_ops inet6_sockraw_ops = {
1307 .family = PF_INET6,
1308 .owner = THIS_MODULE,
1309 .release = inet6_release,
1310 .bind = inet6_bind,
1311 .connect = inet_dgram_connect, /* ok */
1312 .socketpair = sock_no_socketpair, /* a do nothing */
1313 .accept = sock_no_accept, /* a do nothing */
1314 .getname = inet6_getname,
1315 .poll = datagram_poll, /* ok */
1316 .ioctl = inet6_ioctl, /* must change */
1317 .listen = sock_no_listen, /* ok */
1318 .shutdown = inet_shutdown, /* ok */
1319 .setsockopt = sock_common_setsockopt, /* ok */
1320 .getsockopt = sock_common_getsockopt, /* ok */
1321 .sendmsg = inet_sendmsg, /* ok */
1322 .recvmsg = sock_common_recvmsg, /* ok */
1323 .mmap = sock_no_mmap,
1324 .sendpage = sock_no_sendpage,
1325#ifdef CONFIG_COMPAT
1326 .compat_setsockopt = compat_sock_common_setsockopt,
1327 .compat_getsockopt = compat_sock_common_getsockopt,
1328#endif
1329};
1330
1331static struct inet_protosw rawv6_protosw = {
1332 .type = SOCK_RAW,
1333 .protocol = IPPROTO_IP, /* wild card */
1334 .prot = &rawv6_prot,
1335 .ops = &inet6_sockraw_ops,
1336 .capability = CAP_NET_RAW,
1337 .no_check = UDP_CSUM_DEFAULT,
1338 .flags = INET_PROTOSW_REUSE,
1339};
1340
1341int __init rawv6_init(void)
1342{
1343 int ret;
1344
1345 ret = inet6_register_protosw(&rawv6_protosw);
1346 if (ret)
1347 goto out;
1348out:
1349 return ret;
1350}
1351
1352void rawv6_exit(void)
1353{
1354 inet6_unregister_protosw(&rawv6_protosw);
1355}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 76c88a93b9b5..f936d045a39d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -82,23 +82,16 @@ struct frag_queue
82 __u16 nhoffset; 82 __u16 nhoffset;
83}; 83};
84 84
85struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
86 .high_thresh = 256 * 1024,
87 .low_thresh = 192 * 1024,
88 .timeout = IPV6_FRAG_TIMEOUT,
89 .secret_interval = 10 * 60 * HZ,
90};
91
92static struct inet_frags ip6_frags; 85static struct inet_frags ip6_frags;
93 86
94int ip6_frag_nqueues(void) 87int ip6_frag_nqueues(struct net *net)
95{ 88{
96 return ip6_frags.nqueues; 89 return net->ipv6.frags.nqueues;
97} 90}
98 91
99int ip6_frag_mem(void) 92int ip6_frag_mem(struct net *net)
100{ 93{
101 return atomic_read(&ip6_frags.mem); 94 return atomic_read(&net->ipv6.frags.mem);
102} 95}
103 96
104static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 97static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
@@ -156,11 +149,12 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
156EXPORT_SYMBOL(ip6_frag_match); 149EXPORT_SYMBOL(ip6_frag_match);
157 150
158/* Memory Tracking Functions. */ 151/* Memory Tracking Functions. */
159static inline void frag_kfree_skb(struct sk_buff *skb, int *work) 152static inline void frag_kfree_skb(struct netns_frags *nf,
153 struct sk_buff *skb, int *work)
160{ 154{
161 if (work) 155 if (work)
162 *work -= skb->truesize; 156 *work -= skb->truesize;
163 atomic_sub(skb->truesize, &ip6_frags.mem); 157 atomic_sub(skb->truesize, &nf->mem);
164 kfree_skb(skb); 158 kfree_skb(skb);
165} 159}
166 160
@@ -190,11 +184,11 @@ static __inline__ void fq_kill(struct frag_queue *fq)
190 inet_frag_kill(&fq->q, &ip6_frags); 184 inet_frag_kill(&fq->q, &ip6_frags);
191} 185}
192 186
193static void ip6_evictor(struct inet6_dev *idev) 187static void ip6_evictor(struct net *net, struct inet6_dev *idev)
194{ 188{
195 int evicted; 189 int evicted;
196 190
197 evicted = inet_frag_evictor(&ip6_frags); 191 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
198 if (evicted) 192 if (evicted)
199 IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); 193 IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
200} 194}
@@ -241,7 +235,7 @@ out:
241} 235}
242 236
243static __inline__ struct frag_queue * 237static __inline__ struct frag_queue *
244fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, 238fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
245 struct inet6_dev *idev) 239 struct inet6_dev *idev)
246{ 240{
247 struct inet_frag_queue *q; 241 struct inet_frag_queue *q;
@@ -253,7 +247,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
253 arg.dst = dst; 247 arg.dst = dst;
254 hash = ip6qhashfn(id, src, dst); 248 hash = ip6qhashfn(id, src, dst);
255 249
256 q = inet_frag_find(&ip6_frags, &arg, hash); 250 q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
257 if (q == NULL) 251 if (q == NULL)
258 goto oom; 252 goto oom;
259 253
@@ -396,7 +390,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
396 fq->q.fragments = next; 390 fq->q.fragments = next;
397 391
398 fq->q.meat -= free_it->len; 392 fq->q.meat -= free_it->len;
399 frag_kfree_skb(free_it, NULL); 393 frag_kfree_skb(fq->q.net, free_it, NULL);
400 } 394 }
401 } 395 }
402 396
@@ -416,7 +410,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
416 } 410 }
417 fq->q.stamp = skb->tstamp; 411 fq->q.stamp = skb->tstamp;
418 fq->q.meat += skb->len; 412 fq->q.meat += skb->len;
419 atomic_add(skb->truesize, &ip6_frags.mem); 413 atomic_add(skb->truesize, &fq->q.net->mem);
420 414
421 /* The first fragment. 415 /* The first fragment.
422 * nhoffset is obtained from the first fragment, of course. 416 * nhoffset is obtained from the first fragment, of course.
@@ -430,7 +424,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
430 return ip6_frag_reasm(fq, prev, dev); 424 return ip6_frag_reasm(fq, prev, dev);
431 425
432 write_lock(&ip6_frags.lock); 426 write_lock(&ip6_frags.lock);
433 list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); 427 list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
434 write_unlock(&ip6_frags.lock); 428 write_unlock(&ip6_frags.lock);
435 return -1; 429 return -1;
436 430
@@ -510,7 +504,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
510 head->len -= clone->len; 504 head->len -= clone->len;
511 clone->csum = 0; 505 clone->csum = 0;
512 clone->ip_summed = head->ip_summed; 506 clone->ip_summed = head->ip_summed;
513 atomic_add(clone->truesize, &ip6_frags.mem); 507 atomic_add(clone->truesize, &fq->q.net->mem);
514 } 508 }
515 509
516 /* We have to remove fragment header from datagram and to relocate 510 /* We have to remove fragment header from datagram and to relocate
@@ -525,7 +519,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
525 skb_shinfo(head)->frag_list = head->next; 519 skb_shinfo(head)->frag_list = head->next;
526 skb_reset_transport_header(head); 520 skb_reset_transport_header(head);
527 skb_push(head, head->data - skb_network_header(head)); 521 skb_push(head, head->data - skb_network_header(head));
528 atomic_sub(head->truesize, &ip6_frags.mem); 522 atomic_sub(head->truesize, &fq->q.net->mem);
529 523
530 for (fp=head->next; fp; fp = fp->next) { 524 for (fp=head->next; fp; fp = fp->next) {
531 head->data_len += fp->len; 525 head->data_len += fp->len;
@@ -535,7 +529,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
535 else if (head->ip_summed == CHECKSUM_COMPLETE) 529 else if (head->ip_summed == CHECKSUM_COMPLETE)
536 head->csum = csum_add(head->csum, fp->csum); 530 head->csum = csum_add(head->csum, fp->csum);
537 head->truesize += fp->truesize; 531 head->truesize += fp->truesize;
538 atomic_sub(fp->truesize, &ip6_frags.mem); 532 atomic_sub(fp->truesize, &fq->q.net->mem);
539 } 533 }
540 534
541 head->next = NULL; 535 head->next = NULL;
@@ -575,6 +569,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
575 struct frag_hdr *fhdr; 569 struct frag_hdr *fhdr;
576 struct frag_queue *fq; 570 struct frag_queue *fq;
577 struct ipv6hdr *hdr = ipv6_hdr(skb); 571 struct ipv6hdr *hdr = ipv6_hdr(skb);
572 struct net *net;
578 573
579 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); 574 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
580 575
@@ -605,10 +600,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
605 return 1; 600 return 1;
606 } 601 }
607 602
608 if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) 603 net = skb->dev->nd_net;
609 ip6_evictor(ip6_dst_idev(skb->dst)); 604 if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
605 ip6_evictor(net, ip6_dst_idev(skb->dst));
610 606
611 if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, 607 if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
612 ip6_dst_idev(skb->dst))) != NULL) { 608 ip6_dst_idev(skb->dst))) != NULL) {
613 int ret; 609 int ret;
614 610
@@ -632,12 +628,127 @@ static struct inet6_protocol frag_protocol =
632 .flags = INET6_PROTO_NOPOLICY, 628 .flags = INET6_PROTO_NOPOLICY,
633}; 629};
634 630
635void __init ipv6_frag_init(void) 631#ifdef CONFIG_SYSCTL
632static struct ctl_table ip6_frags_ctl_table[] = {
633 {
634 .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
635 .procname = "ip6frag_high_thresh",
636 .data = &init_net.ipv6.frags.high_thresh,
637 .maxlen = sizeof(int),
638 .mode = 0644,
639 .proc_handler = &proc_dointvec
640 },
641 {
642 .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
643 .procname = "ip6frag_low_thresh",
644 .data = &init_net.ipv6.frags.low_thresh,
645 .maxlen = sizeof(int),
646 .mode = 0644,
647 .proc_handler = &proc_dointvec
648 },
649 {
650 .ctl_name = NET_IPV6_IP6FRAG_TIME,
651 .procname = "ip6frag_time",
652 .data = &init_net.ipv6.frags.timeout,
653 .maxlen = sizeof(int),
654 .mode = 0644,
655 .proc_handler = &proc_dointvec_jiffies,
656 .strategy = &sysctl_jiffies,
657 },
658 {
659 .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
660 .procname = "ip6frag_secret_interval",
661 .data = &ip6_frags.secret_interval,
662 .maxlen = sizeof(int),
663 .mode = 0644,
664 .proc_handler = &proc_dointvec_jiffies,
665 .strategy = &sysctl_jiffies
666 },
667 { }
668};
669
670static int ip6_frags_sysctl_register(struct net *net)
636{ 671{
637 if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) 672 struct ctl_table *table;
638 printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); 673 struct ctl_table_header *hdr;
674
675 table = ip6_frags_ctl_table;
676 if (net != &init_net) {
677 table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL);
678 if (table == NULL)
679 goto err_alloc;
680
681 table[0].data = &net->ipv6.frags.high_thresh;
682 table[1].data = &net->ipv6.frags.low_thresh;
683 table[2].data = &net->ipv6.frags.timeout;
684 table[3].mode &= ~0222;
685 }
686
687 hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
688 if (hdr == NULL)
689 goto err_reg;
690
691 net->ipv6.sysctl.frags_hdr = hdr;
692 return 0;
693
694err_reg:
695 if (net != &init_net)
696 kfree(table);
697err_alloc:
698 return -ENOMEM;
699}
700
701static void ip6_frags_sysctl_unregister(struct net *net)
702{
703 struct ctl_table *table;
704
705 table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
706 unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
707 kfree(table);
708}
709#else
710static inline int ip6_frags_sysctl_register(struct net *net)
711{
712 return 0;
713}
714
715static inline void ip6_frags_sysctl_unregister(struct net *net)
716{
717}
718#endif
719
720static int ipv6_frags_init_net(struct net *net)
721{
722 net->ipv6.frags.high_thresh = 256 * 1024;
723 net->ipv6.frags.low_thresh = 192 * 1024;
724 net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
725
726 inet_frags_init_net(&net->ipv6.frags);
727
728 return ip6_frags_sysctl_register(net);
729}
730
731static void ipv6_frags_exit_net(struct net *net)
732{
733 ip6_frags_sysctl_unregister(net);
734 inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
735}
736
737static struct pernet_operations ip6_frags_ops = {
738 .init = ipv6_frags_init_net,
739 .exit = ipv6_frags_exit_net,
740};
741
742int __init ipv6_frag_init(void)
743{
744 int ret;
745
746 ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
747 if (ret)
748 goto out;
749
750 register_pernet_subsys(&ip6_frags_ops);
639 751
640 ip6_frags.ctl = &ip6_frags_ctl;
641 ip6_frags.hashfn = ip6_hashfn; 752 ip6_frags.hashfn = ip6_hashfn;
642 ip6_frags.constructor = ip6_frag_init; 753 ip6_frags.constructor = ip6_frag_init;
643 ip6_frags.destructor = NULL; 754 ip6_frags.destructor = NULL;
@@ -645,5 +756,15 @@ void __init ipv6_frag_init(void)
645 ip6_frags.qsize = sizeof(struct frag_queue); 756 ip6_frags.qsize = sizeof(struct frag_queue);
646 ip6_frags.match = ip6_frag_match; 757 ip6_frags.match = ip6_frag_match;
647 ip6_frags.frag_expire = ip6_frag_expire; 758 ip6_frags.frag_expire = ip6_frag_expire;
759 ip6_frags.secret_interval = 10 * 60 * HZ;
648 inet_frags_init(&ip6_frags); 760 inet_frags_init(&ip6_frags);
761out:
762 return ret;
763}
764
765void ipv6_frag_exit(void)
766{
767 inet_frags_fini(&ip6_frags);
768 unregister_pernet_subsys(&ip6_frags_ops);
769 inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
649} 770}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 95f8e4a62f68..513f72e3db0d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -38,12 +38,8 @@
38#include <linux/in6.h> 38#include <linux/in6.h>
39#include <linux/init.h> 39#include <linux/init.h>
40#include <linux/if_arp.h> 40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h> 41#include <linux/proc_fs.h>
44#include <linux/seq_file.h> 42#include <linux/seq_file.h>
45#endif
46
47#include <net/net_namespace.h> 43#include <net/net_namespace.h>
48#include <net/snmp.h> 44#include <net/snmp.h>
49#include <net/ipv6.h> 45#include <net/ipv6.h>
@@ -77,21 +73,13 @@
77 73
78#define CLONE_OFFLINK_ROUTE 0 74#define CLONE_OFFLINK_ROUTE 0
79 75
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *); 78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *); 79static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *, 80static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how); 81 struct net_device *dev, int how);
94static int ip6_dst_gc(void); 82static int ip6_dst_gc(struct dst_ops *ops);
95 83
96static int ip6_pkt_discard(struct sk_buff *skb); 84static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb); 85static int ip6_pkt_discard_out(struct sk_buff *skb);
@@ -117,7 +105,9 @@ static struct dst_ops ip6_dst_ops = {
117 .negative_advice = ip6_negative_advice, 105 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure, 106 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu, 107 .update_pmtu = ip6_rt_update_pmtu,
108 .local_out = ip6_local_out,
120 .entry_size = sizeof(struct rt6_info), 109 .entry_size = sizeof(struct rt6_info),
110 .entries = ATOMIC_INIT(0),
121}; 111};
122 112
123static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 113static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -131,6 +121,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
131 .check = ip6_dst_check, 121 .check = ip6_dst_check,
132 .update_pmtu = ip6_rt_blackhole_update_pmtu, 122 .update_pmtu = ip6_rt_blackhole_update_pmtu,
133 .entry_size = sizeof(struct rt6_info), 123 .entry_size = sizeof(struct rt6_info),
124 .entries = ATOMIC_INIT(0),
134}; 125};
135 126
136struct rt6_info ip6_null_entry = { 127struct rt6_info ip6_null_entry = {
@@ -156,7 +147,6 @@ struct rt6_info ip6_null_entry = {
156 147
157static int ip6_pkt_prohibit(struct sk_buff *skb); 148static int ip6_pkt_prohibit(struct sk_buff *skb);
158static int ip6_pkt_prohibit_out(struct sk_buff *skb); 149static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159static int ip6_pkt_blk_hole(struct sk_buff *skb);
160 150
161struct rt6_info ip6_prohibit_entry = { 151struct rt6_info ip6_prohibit_entry = {
162 .u = { 152 .u = {
@@ -185,8 +175,8 @@ struct rt6_info ip6_blk_hole_entry = {
185 .obsolete = -1, 175 .obsolete = -1,
186 .error = -EINVAL, 176 .error = -EINVAL,
187 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 177 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
188 .input = ip6_pkt_blk_hole, 178 .input = dst_discard,
189 .output = ip6_pkt_blk_hole, 179 .output = dst_discard,
190 .ops = &ip6_dst_ops, 180 .ops = &ip6_dst_ops,
191 .path = (struct dst_entry*)&ip6_blk_hole_entry, 181 .path = (struct dst_entry*)&ip6_blk_hole_entry,
192 } 182 }
@@ -220,9 +210,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
220{ 210{
221 struct rt6_info *rt = (struct rt6_info *)dst; 211 struct rt6_info *rt = (struct rt6_info *)dst;
222 struct inet6_dev *idev = rt->rt6i_idev; 212 struct inet6_dev *idev = rt->rt6i_idev;
213 struct net_device *loopback_dev =
214 dev->nd_net->loopback_dev;
223 215
224 if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { 216 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
225 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); 217 struct inet6_dev *loopback_idev =
218 in6_dev_get(loopback_dev);
226 if (loopback_idev != NULL) { 219 if (loopback_idev != NULL) {
227 rt->rt6i_idev = loopback_idev; 220 rt->rt6i_idev = loopback_idev;
228 in6_dev_put(idev); 221 in6_dev_put(idev);
@@ -333,7 +326,7 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
333static inline int rt6_check_neigh(struct rt6_info *rt) 326static inline int rt6_check_neigh(struct rt6_info *rt)
334{ 327{
335 struct neighbour *neigh = rt->rt6i_nexthop; 328 struct neighbour *neigh = rt->rt6i_nexthop;
336 int m = 0; 329 int m;
337 if (rt->rt6i_flags & RTF_NONEXTHOP || 330 if (rt->rt6i_flags & RTF_NONEXTHOP ||
338 !(rt->rt6i_flags & RTF_GATEWAY)) 331 !(rt->rt6i_flags & RTF_GATEWAY))
339 m = 1; 332 m = 1;
@@ -341,10 +334,15 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
341 read_lock_bh(&neigh->lock); 334 read_lock_bh(&neigh->lock);
342 if (neigh->nud_state & NUD_VALID) 335 if (neigh->nud_state & NUD_VALID)
343 m = 2; 336 m = 2;
344 else if (!(neigh->nud_state & NUD_FAILED)) 337#ifdef CONFIG_IPV6_ROUTER_PREF
338 else if (neigh->nud_state & NUD_FAILED)
339 m = 0;
340#endif
341 else
345 m = 1; 342 m = 1;
346 read_unlock_bh(&neigh->lock); 343 read_unlock_bh(&neigh->lock);
347 } 344 } else
345 m = 0;
348 return m; 346 return m;
349} 347}
350 348
@@ -548,12 +546,8 @@ restart:
548 rt = rt6_device_match(rt, fl->oif, flags); 546 rt = rt6_device_match(rt, fl->oif, flags);
549 BACKTRACK(&fl->fl6_src); 547 BACKTRACK(&fl->fl6_src);
550out: 548out:
551 dst_hold(&rt->u.dst); 549 dst_use(&rt->u.dst, jiffies);
552 read_unlock_bh(&table->tb6_lock); 550 read_unlock_bh(&table->tb6_lock);
553
554 rt->u.dst.lastuse = jiffies;
555 rt->u.dst.__use++;
556
557 return rt; 551 return rt;
558 552
559} 553}
@@ -609,7 +603,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
609 603
610int ip6_ins_rt(struct rt6_info *rt) 604int ip6_ins_rt(struct rt6_info *rt)
611{ 605{
612 return __ip6_ins_rt(rt, NULL); 606 struct nl_info info = {
607 .nl_net = &init_net,
608 };
609 return __ip6_ins_rt(rt, &info);
613} 610}
614 611
615static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 612static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
@@ -785,12 +782,6 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
785 782
786EXPORT_SYMBOL(ip6_route_output); 783EXPORT_SYMBOL(ip6_route_output);
787 784
788static int ip6_blackhole_output(struct sk_buff *skb)
789{
790 kfree_skb(skb);
791 return 0;
792}
793
794int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 785int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
795{ 786{
796 struct rt6_info *ort = (struct rt6_info *) *dstp; 787 struct rt6_info *ort = (struct rt6_info *) *dstp;
@@ -803,8 +794,8 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
803 794
804 atomic_set(&new->__refcnt, 1); 795 atomic_set(&new->__refcnt, 1);
805 new->__use = 1; 796 new->__use = 1;
806 new->input = ip6_blackhole_output; 797 new->input = dst_discard;
807 new->output = ip6_blackhole_output; 798 new->output = dst_discard;
808 799
809 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 800 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
810 new->dev = ort->u.dst.dev; 801 new->dev = ort->u.dst.dev;
@@ -899,8 +890,8 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
899{ 890{
900 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 891 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
901 892
902 if (mtu < ip6_rt_min_advmss) 893 if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
903 mtu = ip6_rt_min_advmss; 894 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
904 895
905 /* 896 /*
906 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 897 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
@@ -994,25 +985,25 @@ int ndisc_dst_gc(int *more)
994 return freed; 985 return freed;
995} 986}
996 987
997static int ip6_dst_gc(void) 988static int ip6_dst_gc(struct dst_ops *ops)
998{ 989{
999 static unsigned expire = 30*HZ; 990 static unsigned expire = 30*HZ;
1000 static unsigned long last_gc; 991 static unsigned long last_gc;
1001 unsigned long now = jiffies; 992 unsigned long now = jiffies;
1002 993
1003 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 994 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1004 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 995 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1005 goto out; 996 goto out;
1006 997
1007 expire++; 998 expire++;
1008 fib6_run_gc(expire); 999 fib6_run_gc(expire);
1009 last_gc = now; 1000 last_gc = now;
1010 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 1001 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1011 expire = ip6_rt_gc_timeout>>1; 1002 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1012 1003
1013out: 1004out:
1014 expire -= expire>>ip6_rt_gc_elasticity; 1005 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1015 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 1006 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1016} 1007}
1017 1008
1018/* Clean host part of a prefix. Not necessary in radix tree, 1009/* Clean host part of a prefix. Not necessary in radix tree,
@@ -1272,7 +1263,10 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1272 1263
1273int ip6_del_rt(struct rt6_info *rt) 1264int ip6_del_rt(struct rt6_info *rt)
1274{ 1265{
1275 return __ip6_del_rt(rt, NULL); 1266 struct nl_info info = {
1267 .nl_net = &init_net,
1268 };
1269 return __ip6_del_rt(rt, &info);
1276} 1270}
1277 1271
1278static int ip6_route_del(struct fib6_config *cfg) 1272static int ip6_route_del(struct fib6_config *cfg)
@@ -1517,7 +1511,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1517 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1511 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1518 if (allfrag) 1512 if (allfrag)
1519 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1513 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1520 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1514 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1521 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1515 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1522 goto out; 1516 goto out;
1523 } 1517 }
@@ -1543,7 +1537,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1543 * which is 10 mins. After 10 mins the decreased pmtu is expired 1537 * which is 10 mins. After 10 mins the decreased pmtu is expired
1544 * and detecting PMTU increase will be automatically happened. 1538 * and detecting PMTU increase will be automatically happened.
1545 */ 1539 */
1546 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1540 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1547 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1541 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1548 1542
1549 ip6_ins_rt(nrt); 1543 ip6_ins_rt(nrt);
@@ -1668,6 +1662,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1668 return rt; 1662 return rt;
1669} 1663}
1670 1664
1665EXPORT_SYMBOL(rt6_get_dflt_router);
1666
1671struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1667struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1672 struct net_device *dev, 1668 struct net_device *dev,
1673 unsigned int pref) 1669 unsigned int pref)
@@ -1769,8 +1765,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1769 * Drop the packet on the floor 1765 * Drop the packet on the floor
1770 */ 1766 */
1771 1767
1772static inline int ip6_pkt_drop(struct sk_buff *skb, int code, 1768static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1773 int ipstats_mib_noroutes)
1774{ 1769{
1775 int type; 1770 int type;
1776 switch (ipstats_mib_noroutes) { 1771 switch (ipstats_mib_noroutes) {
@@ -1814,12 +1809,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1814 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1809 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1815} 1810}
1816 1811
1817static int ip6_pkt_blk_hole(struct sk_buff *skb)
1818{
1819 kfree_skb(skb);
1820 return 0;
1821}
1822
1823#endif 1812#endif
1824 1813
1825/* 1814/*
@@ -1920,7 +1909,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1920 */ 1909 */
1921 if (rt->rt6i_dev == arg->dev && 1910 if (rt->rt6i_dev == arg->dev &&
1922 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1911 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1923 (dst_mtu(&rt->u.dst) > arg->mtu || 1912 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1924 (dst_mtu(&rt->u.dst) < arg->mtu && 1913 (dst_mtu(&rt->u.dst) < arg->mtu &&
1925 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 1914 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1926 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1915 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
@@ -1973,6 +1962,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1973 1962
1974 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 1963 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1975 cfg->fc_nlinfo.nlh = nlh; 1964 cfg->fc_nlinfo.nlh = nlh;
1965 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
1976 1966
1977 if (tb[RTA_GATEWAY]) { 1967 if (tb[RTA_GATEWAY]) {
1978 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 1968 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
@@ -2018,9 +2008,13 @@ errout:
2018 2008
2019static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2009static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2020{ 2010{
2011 struct net *net = skb->sk->sk_net;
2021 struct fib6_config cfg; 2012 struct fib6_config cfg;
2022 int err; 2013 int err;
2023 2014
2015 if (net != &init_net)
2016 return -EINVAL;
2017
2024 err = rtm_to_fib6_config(skb, nlh, &cfg); 2018 err = rtm_to_fib6_config(skb, nlh, &cfg);
2025 if (err < 0) 2019 if (err < 0)
2026 return err; 2020 return err;
@@ -2030,9 +2024,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2030 2024
2031static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2025static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2032{ 2026{
2027 struct net *net = skb->sk->sk_net;
2033 struct fib6_config cfg; 2028 struct fib6_config cfg;
2034 int err; 2029 int err;
2035 2030
2031 if (net != &init_net)
2032 return -EINVAL;
2033
2036 err = rtm_to_fib6_config(skb, nlh, &cfg); 2034 err = rtm_to_fib6_config(skb, nlh, &cfg);
2037 if (err < 0) 2035 if (err < 0)
2038 return err; 2036 return err;
@@ -2167,6 +2165,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2167 2165
2168static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2166static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2169{ 2167{
2168 struct net *net = in_skb->sk->sk_net;
2170 struct nlattr *tb[RTA_MAX+1]; 2169 struct nlattr *tb[RTA_MAX+1];
2171 struct rt6_info *rt; 2170 struct rt6_info *rt;
2172 struct sk_buff *skb; 2171 struct sk_buff *skb;
@@ -2174,6 +2173,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2174 struct flowi fl; 2173 struct flowi fl;
2175 int err, iif = 0; 2174 int err, iif = 0;
2176 2175
2176 if (net != &init_net)
2177 return -EINVAL;
2178
2177 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2179 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2178 if (err < 0) 2180 if (err < 0)
2179 goto errout; 2181 goto errout;
@@ -2233,7 +2235,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2233 goto errout; 2235 goto errout;
2234 } 2236 }
2235 2237
2236 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 2238 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2237errout: 2239errout:
2238 return err; 2240 return err;
2239} 2241}
@@ -2241,32 +2243,29 @@ errout:
2241void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2243void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2242{ 2244{
2243 struct sk_buff *skb; 2245 struct sk_buff *skb;
2244 u32 pid = 0, seq = 0; 2246 u32 seq;
2245 struct nlmsghdr *nlh = NULL; 2247 int err;
2246 int err = -ENOBUFS; 2248
2247 2249 err = -ENOBUFS;
2248 if (info) { 2250 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2249 pid = info->pid;
2250 nlh = info->nlh;
2251 if (nlh)
2252 seq = nlh->nlmsg_seq;
2253 }
2254 2251
2255 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2252 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2256 if (skb == NULL) 2253 if (skb == NULL)
2257 goto errout; 2254 goto errout;
2258 2255
2259 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); 2256 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2257 event, info->pid, seq, 0, 0);
2260 if (err < 0) { 2258 if (err < 0) {
2261 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2259 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2262 WARN_ON(err == -EMSGSIZE); 2260 WARN_ON(err == -EMSGSIZE);
2263 kfree_skb(skb); 2261 kfree_skb(skb);
2264 goto errout; 2262 goto errout;
2265 } 2263 }
2266 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); 2264 err = rtnl_notify(skb, &init_net, info->pid,
2265 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
2267errout: 2266errout:
2268 if (err < 0) 2267 if (err < 0)
2269 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); 2268 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
2270} 2269}
2271 2270
2272/* 2271/*
@@ -2288,71 +2287,50 @@ struct rt6_proc_arg
2288 2287
2289static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2288static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2290{ 2289{
2291 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; 2290 struct seq_file *m = p_arg;
2292
2293 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2294 arg->skip++;
2295 return 0;
2296 }
2297
2298 if (arg->len >= arg->length)
2299 return 0;
2300 2291
2301 arg->len += sprintf(arg->buffer + arg->len, 2292 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2302 NIP6_SEQFMT " %02x ", 2293 rt->rt6i_dst.plen);
2303 NIP6(rt->rt6i_dst.addr),
2304 rt->rt6i_dst.plen);
2305 2294
2306#ifdef CONFIG_IPV6_SUBTREES 2295#ifdef CONFIG_IPV6_SUBTREES
2307 arg->len += sprintf(arg->buffer + arg->len, 2296 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2308 NIP6_SEQFMT " %02x ", 2297 rt->rt6i_src.plen);
2309 NIP6(rt->rt6i_src.addr),
2310 rt->rt6i_src.plen);
2311#else 2298#else
2312 arg->len += sprintf(arg->buffer + arg->len, 2299 seq_puts(m, "00000000000000000000000000000000 00 ");
2313 "00000000000000000000000000000000 00 ");
2314#endif 2300#endif
2315 2301
2316 if (rt->rt6i_nexthop) { 2302 if (rt->rt6i_nexthop) {
2317 arg->len += sprintf(arg->buffer + arg->len, 2303 seq_printf(m, NIP6_SEQFMT,
2318 NIP6_SEQFMT, 2304 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2319 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2320 } else { 2305 } else {
2321 arg->len += sprintf(arg->buffer + arg->len, 2306 seq_puts(m, "00000000000000000000000000000000");
2322 "00000000000000000000000000000000");
2323 } 2307 }
2324 arg->len += sprintf(arg->buffer + arg->len, 2308 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2325 " %08x %08x %08x %08x %8s\n", 2309 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2326 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2310 rt->u.dst.__use, rt->rt6i_flags,
2327 rt->u.dst.__use, rt->rt6i_flags, 2311 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2328 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2329 return 0; 2312 return 0;
2330} 2313}
2331 2314
2332static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 2315static int ipv6_route_show(struct seq_file *m, void *v)
2333{ 2316{
2334 struct rt6_proc_arg arg = { 2317 fib6_clean_all(rt6_info_route, 0, m);
2335 .buffer = buffer, 2318 return 0;
2336 .offset = offset, 2319}
2337 .length = length,
2338 };
2339
2340 fib6_clean_all(rt6_info_route, 0, &arg);
2341
2342 *start = buffer;
2343 if (offset)
2344 *start += offset % RT6_INFO_LEN;
2345
2346 arg.len -= offset % RT6_INFO_LEN;
2347
2348 if (arg.len > length)
2349 arg.len = length;
2350 if (arg.len < 0)
2351 arg.len = 0;
2352 2320
2353 return arg.len; 2321static int ipv6_route_open(struct inode *inode, struct file *file)
2322{
2323 return single_open(file, ipv6_route_show, NULL);
2354} 2324}
2355 2325
2326static const struct file_operations ipv6_route_proc_fops = {
2327 .owner = THIS_MODULE,
2328 .open = ipv6_route_open,
2329 .read = seq_read,
2330 .llseek = seq_lseek,
2331 .release = single_release,
2332};
2333
2356static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2334static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2357{ 2335{
2358 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2336 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
@@ -2377,28 +2355,61 @@ static const struct file_operations rt6_stats_seq_fops = {
2377 .llseek = seq_lseek, 2355 .llseek = seq_lseek,
2378 .release = single_release, 2356 .release = single_release,
2379}; 2357};
2358
2359static int ipv6_route_proc_init(struct net *net)
2360{
2361 int ret = -ENOMEM;
2362 if (!proc_net_fops_create(net, "ipv6_route",
2363 0, &ipv6_route_proc_fops))
2364 goto out;
2365
2366 if (!proc_net_fops_create(net, "rt6_stats",
2367 S_IRUGO, &rt6_stats_seq_fops))
2368 goto out_ipv6_route;
2369
2370 ret = 0;
2371out:
2372 return ret;
2373out_ipv6_route:
2374 proc_net_remove(net, "ipv6_route");
2375 goto out;
2376}
2377
2378static void ipv6_route_proc_fini(struct net *net)
2379{
2380 proc_net_remove(net, "ipv6_route");
2381 proc_net_remove(net, "rt6_stats");
2382}
2383#else
2384static inline int ipv6_route_proc_init(struct net *net)
2385{
2386 return 0;
2387}
2388static inline void ipv6_route_proc_fini(struct net *net)
2389{
2390 return ;
2391}
2380#endif /* CONFIG_PROC_FS */ 2392#endif /* CONFIG_PROC_FS */
2381 2393
2382#ifdef CONFIG_SYSCTL 2394#ifdef CONFIG_SYSCTL
2383 2395
2384static int flush_delay;
2385
2386static 2396static
2387int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2397int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2388 void __user *buffer, size_t *lenp, loff_t *ppos) 2398 void __user *buffer, size_t *lenp, loff_t *ppos)
2389{ 2399{
2400 int delay = init_net.ipv6.sysctl.flush_delay;
2390 if (write) { 2401 if (write) {
2391 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2402 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2392 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2403 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay);
2393 return 0; 2404 return 0;
2394 } else 2405 } else
2395 return -EINVAL; 2406 return -EINVAL;
2396} 2407}
2397 2408
2398ctl_table ipv6_route_table[] = { 2409ctl_table ipv6_route_table_template[] = {
2399 { 2410 {
2400 .procname = "flush", 2411 .procname = "flush",
2401 .data = &flush_delay, 2412 .data = &init_net.ipv6.sysctl.flush_delay,
2402 .maxlen = sizeof(int), 2413 .maxlen = sizeof(int),
2403 .mode = 0200, 2414 .mode = 0200,
2404 .proc_handler = &ipv6_sysctl_rtcache_flush 2415 .proc_handler = &ipv6_sysctl_rtcache_flush
@@ -2414,7 +2425,7 @@ ctl_table ipv6_route_table[] = {
2414 { 2425 {
2415 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2426 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2416 .procname = "max_size", 2427 .procname = "max_size",
2417 .data = &ip6_rt_max_size, 2428 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2418 .maxlen = sizeof(int), 2429 .maxlen = sizeof(int),
2419 .mode = 0644, 2430 .mode = 0644,
2420 .proc_handler = &proc_dointvec, 2431 .proc_handler = &proc_dointvec,
@@ -2422,7 +2433,7 @@ ctl_table ipv6_route_table[] = {
2422 { 2433 {
2423 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2434 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2424 .procname = "gc_min_interval", 2435 .procname = "gc_min_interval",
2425 .data = &ip6_rt_gc_min_interval, 2436 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2426 .maxlen = sizeof(int), 2437 .maxlen = sizeof(int),
2427 .mode = 0644, 2438 .mode = 0644,
2428 .proc_handler = &proc_dointvec_jiffies, 2439 .proc_handler = &proc_dointvec_jiffies,
@@ -2431,7 +2442,7 @@ ctl_table ipv6_route_table[] = {
2431 { 2442 {
2432 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2443 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2433 .procname = "gc_timeout", 2444 .procname = "gc_timeout",
2434 .data = &ip6_rt_gc_timeout, 2445 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2435 .maxlen = sizeof(int), 2446 .maxlen = sizeof(int),
2436 .mode = 0644, 2447 .mode = 0644,
2437 .proc_handler = &proc_dointvec_jiffies, 2448 .proc_handler = &proc_dointvec_jiffies,
@@ -2440,7 +2451,7 @@ ctl_table ipv6_route_table[] = {
2440 { 2451 {
2441 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2452 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2442 .procname = "gc_interval", 2453 .procname = "gc_interval",
2443 .data = &ip6_rt_gc_interval, 2454 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2444 .maxlen = sizeof(int), 2455 .maxlen = sizeof(int),
2445 .mode = 0644, 2456 .mode = 0644,
2446 .proc_handler = &proc_dointvec_jiffies, 2457 .proc_handler = &proc_dointvec_jiffies,
@@ -2449,7 +2460,7 @@ ctl_table ipv6_route_table[] = {
2449 { 2460 {
2450 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2461 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2451 .procname = "gc_elasticity", 2462 .procname = "gc_elasticity",
2452 .data = &ip6_rt_gc_elasticity, 2463 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2453 .maxlen = sizeof(int), 2464 .maxlen = sizeof(int),
2454 .mode = 0644, 2465 .mode = 0644,
2455 .proc_handler = &proc_dointvec_jiffies, 2466 .proc_handler = &proc_dointvec_jiffies,
@@ -2458,7 +2469,7 @@ ctl_table ipv6_route_table[] = {
2458 { 2469 {
2459 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2470 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2460 .procname = "mtu_expires", 2471 .procname = "mtu_expires",
2461 .data = &ip6_rt_mtu_expires, 2472 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2462 .maxlen = sizeof(int), 2473 .maxlen = sizeof(int),
2463 .mode = 0644, 2474 .mode = 0644,
2464 .proc_handler = &proc_dointvec_jiffies, 2475 .proc_handler = &proc_dointvec_jiffies,
@@ -2467,7 +2478,7 @@ ctl_table ipv6_route_table[] = {
2467 { 2478 {
2468 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2479 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2469 .procname = "min_adv_mss", 2480 .procname = "min_adv_mss",
2470 .data = &ip6_rt_min_advmss, 2481 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2471 .maxlen = sizeof(int), 2482 .maxlen = sizeof(int),
2472 .mode = 0644, 2483 .mode = 0644,
2473 .proc_handler = &proc_dointvec_jiffies, 2484 .proc_handler = &proc_dointvec_jiffies,
@@ -2476,7 +2487,7 @@ ctl_table ipv6_route_table[] = {
2476 { 2487 {
2477 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2488 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2478 .procname = "gc_min_interval_ms", 2489 .procname = "gc_min_interval_ms",
2479 .data = &ip6_rt_gc_min_interval, 2490 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2480 .maxlen = sizeof(int), 2491 .maxlen = sizeof(int),
2481 .mode = 0644, 2492 .mode = 0644,
2482 .proc_handler = &proc_dointvec_ms_jiffies, 2493 .proc_handler = &proc_dointvec_ms_jiffies,
@@ -2485,50 +2496,74 @@ ctl_table ipv6_route_table[] = {
2485 { .ctl_name = 0 } 2496 { .ctl_name = 0 }
2486}; 2497};
2487 2498
2499struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2500{
2501 struct ctl_table *table;
2502
2503 table = kmemdup(ipv6_route_table_template,
2504 sizeof(ipv6_route_table_template),
2505 GFP_KERNEL);
2506 return table;
2507}
2488#endif 2508#endif
2489 2509
2490void __init ip6_route_init(void) 2510int __init ip6_route_init(void)
2491{ 2511{
2492#ifdef CONFIG_PROC_FS 2512 int ret;
2493 struct proc_dir_entry *p; 2513
2494#endif
2495 ip6_dst_ops.kmem_cachep = 2514 ip6_dst_ops.kmem_cachep =
2496 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2515 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2497 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2516 SLAB_HWCACHE_ALIGN, NULL);
2517 if (!ip6_dst_ops.kmem_cachep)
2518 return -ENOMEM;
2519
2498 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; 2520 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2499 2521
2500 fib6_init(); 2522 ret = fib6_init();
2501#ifdef CONFIG_PROC_FS 2523 if (ret)
2502 p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); 2524 goto out_kmem_cache;
2503 if (p)
2504 p->owner = THIS_MODULE;
2505 2525
2506 proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2526 ret = ipv6_route_proc_init(&init_net);
2507#endif 2527 if (ret)
2508#ifdef CONFIG_XFRM 2528 goto out_fib6_init;
2509 xfrm6_init(); 2529
2510#endif 2530 ret = xfrm6_init();
2511#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2531 if (ret)
2512 fib6_rules_init(); 2532 goto out_proc_init;
2513#endif
2514 2533
2515 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL); 2534 ret = fib6_rules_init();
2516 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL); 2535 if (ret)
2517 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL); 2536 goto xfrm6_init;
2537
2538 ret = -ENOBUFS;
2539 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2540 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2541 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2542 goto fib6_rules_init;
2543
2544 ret = 0;
2545out:
2546 return ret;
2547
2548fib6_rules_init:
2549 fib6_rules_cleanup();
2550xfrm6_init:
2551 xfrm6_fini();
2552out_proc_init:
2553 ipv6_route_proc_fini(&init_net);
2554out_fib6_init:
2555 rt6_ifdown(NULL);
2556 fib6_gc_cleanup();
2557out_kmem_cache:
2558 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2559 goto out;
2518} 2560}
2519 2561
2520void ip6_route_cleanup(void) 2562void ip6_route_cleanup(void)
2521{ 2563{
2522#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2523 fib6_rules_cleanup(); 2564 fib6_rules_cleanup();
2524#endif 2565 ipv6_route_proc_fini(&init_net);
2525#ifdef CONFIG_PROC_FS
2526 proc_net_remove(&init_net, "ipv6_route");
2527 proc_net_remove(&init_net, "rt6_stats");
2528#endif
2529#ifdef CONFIG_XFRM
2530 xfrm6_fini(); 2566 xfrm6_fini();
2531#endif
2532 rt6_ifdown(NULL); 2567 rt6_ifdown(NULL);
2533 fib6_gc_cleanup(); 2568 fib6_gc_cleanup();
2534 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2569 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 71433d29d884..e77239d02bf5 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -16,6 +16,7 @@
16 * Changes: 16 * Changes:
17 * Roger Venning <r.venning@telstra.com>: 6to4 support 17 * Roger Venning <r.venning@telstra.com>: 6to4 support
18 * Nate Thompson <nate@thebog.net>: 6to4 support 18 * Nate Thompson <nate@thebog.net>: 6to4 support
19 * Fred L. Templin <fltemplin@acm.org>: isatap support
19 */ 20 */
20 21
21#include <linux/module.h> 22#include <linux/module.h>
@@ -182,6 +183,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
182 dev->init = ipip6_tunnel_init; 183 dev->init = ipip6_tunnel_init;
183 nt->parms = *parms; 184 nt->parms = *parms;
184 185
186 if (parms->i_flags & SIT_ISATAP)
187 dev->priv_flags |= IFF_ISATAP;
188
185 if (register_netdevice(dev) < 0) { 189 if (register_netdevice(dev) < 0) {
186 free_netdev(dev); 190 free_netdev(dev);
187 goto failed; 191 goto failed;
@@ -364,6 +368,48 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
364 IP6_ECN_set_ce(ipv6_hdr(skb)); 368 IP6_ECN_set_ce(ipv6_hdr(skb));
365} 369}
366 370
371/* ISATAP (RFC4214) - check source address */
372static int
373isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev)
374{
375 struct neighbour *neigh;
376 struct dst_entry *dst;
377 struct rt6_info *rt;
378 struct flowi fl;
379 struct in6_addr *addr6;
380 struct in6_addr rtr;
381 struct ipv6hdr *iph6;
382 int ok = 0;
383
384 /* from onlink default router */
385 ipv6_addr_set(&rtr, htonl(0xFE800000), 0, 0, 0);
386 ipv6_isatap_eui64(rtr.s6_addr + 8, iph->saddr);
387 if ((rt = rt6_get_dflt_router(&rtr, dev))) {
388 dst_release(&rt->u.dst);
389 return 1;
390 }
391
392 iph6 = ipv6_hdr(skb);
393 memset(&fl, 0, sizeof(fl));
394 fl.proto = iph6->nexthdr;
395 ipv6_addr_copy(&fl.fl6_dst, &iph6->saddr);
396 fl.oif = dev->ifindex;
397 security_skb_classify_flow(skb, &fl);
398
399 dst = ip6_route_output(NULL, &fl);
400 if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) {
401
402 addr6 = (struct in6_addr*)&neigh->primary_key;
403
404 /* from correct previous hop */
405 if (ipv6_addr_is_isatap(addr6) &&
406 (addr6->s6_addr32[3] == iph->saddr))
407 ok = 1;
408 }
409 dst_release(dst);
410 return ok;
411}
412
367static int ipip6_rcv(struct sk_buff *skb) 413static int ipip6_rcv(struct sk_buff *skb)
368{ 414{
369 struct iphdr *iph; 415 struct iphdr *iph;
@@ -382,6 +428,14 @@ static int ipip6_rcv(struct sk_buff *skb)
382 IPCB(skb)->flags = 0; 428 IPCB(skb)->flags = 0;
383 skb->protocol = htons(ETH_P_IPV6); 429 skb->protocol = htons(ETH_P_IPV6);
384 skb->pkt_type = PACKET_HOST; 430 skb->pkt_type = PACKET_HOST;
431
432 if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
433 !isatap_srcok(skb, iph, tunnel->dev)) {
434 tunnel->stat.rx_errors++;
435 read_unlock(&ipip6_lock);
436 kfree_skb(skb);
437 return 0;
438 }
385 tunnel->stat.rx_packets++; 439 tunnel->stat.rx_packets++;
386 tunnel->stat.rx_bytes += skb->len; 440 tunnel->stat.rx_bytes += skb->len;
387 skb->dev = tunnel->dev; 441 skb->dev = tunnel->dev;
@@ -444,6 +498,29 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
444 if (skb->protocol != htons(ETH_P_IPV6)) 498 if (skb->protocol != htons(ETH_P_IPV6))
445 goto tx_error; 499 goto tx_error;
446 500
501 /* ISATAP (RFC4214) - must come before 6to4 */
502 if (dev->priv_flags & IFF_ISATAP) {
503 struct neighbour *neigh = NULL;
504
505 if (skb->dst)
506 neigh = skb->dst->neighbour;
507
508 if (neigh == NULL) {
509 if (net_ratelimit())
510 printk(KERN_DEBUG "sit: nexthop == NULL\n");
511 goto tx_error;
512 }
513
514 addr6 = (struct in6_addr*)&neigh->primary_key;
515 addr_type = ipv6_addr_type(addr6);
516
517 if ((addr_type & IPV6_ADDR_UNICAST) &&
518 ipv6_addr_is_isatap(addr6))
519 dst = addr6->s6_addr32[3];
520 else
521 goto tx_error;
522 }
523
447 if (!dst) 524 if (!dst)
448 dst = try_6to4(&iph6->daddr); 525 dst = try_6to4(&iph6->daddr);
449 526
@@ -480,7 +557,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
480 .tos = RT_TOS(tos) } }, 557 .tos = RT_TOS(tos) } },
481 .oif = tunnel->parms.link, 558 .oif = tunnel->parms.link,
482 .proto = IPPROTO_IPV6 }; 559 .proto = IPPROTO_IPV6 };
483 if (ip_route_output_key(&rt, &fl)) { 560 if (ip_route_output_key(&init_net, &rt, &fl)) {
484 tunnel->stat.tx_carrier_errors++; 561 tunnel->stat.tx_carrier_errors++;
485 goto tx_error_icmp; 562 goto tx_error_icmp;
486 } 563 }
@@ -592,6 +669,42 @@ tx_error:
592 return 0; 669 return 0;
593} 670}
594 671
672static void ipip6_tunnel_bind_dev(struct net_device *dev)
673{
674 struct net_device *tdev = NULL;
675 struct ip_tunnel *tunnel;
676 struct iphdr *iph;
677
678 tunnel = netdev_priv(dev);
679 iph = &tunnel->parms.iph;
680
681 if (iph->daddr) {
682 struct flowi fl = { .nl_u = { .ip4_u =
683 { .daddr = iph->daddr,
684 .saddr = iph->saddr,
685 .tos = RT_TOS(iph->tos) } },
686 .oif = tunnel->parms.link,
687 .proto = IPPROTO_IPV6 };
688 struct rtable *rt;
689 if (!ip_route_output_key(&init_net, &rt, &fl)) {
690 tdev = rt->u.dst.dev;
691 ip_rt_put(rt);
692 }
693 dev->flags |= IFF_POINTOPOINT;
694 }
695
696 if (!tdev && tunnel->parms.link)
697 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
698
699 if (tdev) {
700 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
701 dev->mtu = tdev->mtu - sizeof(struct iphdr);
702 if (dev->mtu < IPV6_MIN_MTU)
703 dev->mtu = IPV6_MIN_MTU;
704 }
705 dev->iflink = tunnel->parms.link;
706}
707
595static int 708static int
596ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 709ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
597{ 710{
@@ -663,6 +776,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
663 if (cmd == SIOCCHGTUNNEL) { 776 if (cmd == SIOCCHGTUNNEL) {
664 t->parms.iph.ttl = p.iph.ttl; 777 t->parms.iph.ttl = p.iph.ttl;
665 t->parms.iph.tos = p.iph.tos; 778 t->parms.iph.tos = p.iph.tos;
779 if (t->parms.link != p.link) {
780 t->parms.link = p.link;
781 ipip6_tunnel_bind_dev(dev);
782 netdev_state_change(dev);
783 }
666 } 784 }
667 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 785 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
668 err = -EFAULT; 786 err = -EFAULT;
@@ -731,12 +849,9 @@ static void ipip6_tunnel_setup(struct net_device *dev)
731 849
732static int ipip6_tunnel_init(struct net_device *dev) 850static int ipip6_tunnel_init(struct net_device *dev)
733{ 851{
734 struct net_device *tdev = NULL;
735 struct ip_tunnel *tunnel; 852 struct ip_tunnel *tunnel;
736 struct iphdr *iph;
737 853
738 tunnel = netdev_priv(dev); 854 tunnel = netdev_priv(dev);
739 iph = &tunnel->parms.iph;
740 855
741 tunnel->dev = dev; 856 tunnel->dev = dev;
742 strcpy(tunnel->parms.name, dev->name); 857 strcpy(tunnel->parms.name, dev->name);
@@ -744,31 +859,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
744 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 859 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
745 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 860 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
746 861
747 if (iph->daddr) { 862 ipip6_tunnel_bind_dev(dev);
748 struct flowi fl = { .nl_u = { .ip4_u =
749 { .daddr = iph->daddr,
750 .saddr = iph->saddr,
751 .tos = RT_TOS(iph->tos) } },
752 .oif = tunnel->parms.link,
753 .proto = IPPROTO_IPV6 };
754 struct rtable *rt;
755 if (!ip_route_output_key(&rt, &fl)) {
756 tdev = rt->u.dst.dev;
757 ip_rt_put(rt);
758 }
759 dev->flags |= IFF_POINTOPOINT;
760 }
761
762 if (!tdev && tunnel->parms.link)
763 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
764
765 if (tdev) {
766 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
767 dev->mtu = tdev->mtu - sizeof(struct iphdr);
768 if (dev->mtu < IPV6_MIN_MTU)
769 dev->mtu = IPV6_MIN_MTU;
770 }
771 dev->iflink = tunnel->parms.link;
772 863
773 return 0; 864 return 0;
774} 865}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 68bb2548e469..408691b777c2 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,66 +14,30 @@
14#include <net/addrconf.h> 14#include <net/addrconf.h>
15#include <net/inet_frag.h> 15#include <net/inet_frag.h>
16 16
17#ifdef CONFIG_SYSCTL 17static ctl_table ipv6_table_template[] = {
18
19static ctl_table ipv6_table[] = {
20 { 18 {
21 .ctl_name = NET_IPV6_ROUTE, 19 .ctl_name = NET_IPV6_ROUTE,
22 .procname = "route", 20 .procname = "route",
23 .maxlen = 0, 21 .maxlen = 0,
24 .mode = 0555, 22 .mode = 0555,
25 .child = ipv6_route_table 23 .child = ipv6_route_table_template
26 }, 24 },
27 { 25 {
28 .ctl_name = NET_IPV6_ICMP, 26 .ctl_name = NET_IPV6_ICMP,
29 .procname = "icmp", 27 .procname = "icmp",
30 .maxlen = 0, 28 .maxlen = 0,
31 .mode = 0555, 29 .mode = 0555,
32 .child = ipv6_icmp_table 30 .child = ipv6_icmp_table_template
33 }, 31 },
34 { 32 {
35 .ctl_name = NET_IPV6_BINDV6ONLY, 33 .ctl_name = NET_IPV6_BINDV6ONLY,
36 .procname = "bindv6only", 34 .procname = "bindv6only",
37 .data = &sysctl_ipv6_bindv6only, 35 .data = &init_net.ipv6.sysctl.bindv6only,
38 .maxlen = sizeof(int), 36 .maxlen = sizeof(int),
39 .mode = 0644, 37 .mode = 0644,
40 .proc_handler = &proc_dointvec 38 .proc_handler = &proc_dointvec
41 }, 39 },
42 { 40 {
43 .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
44 .procname = "ip6frag_high_thresh",
45 .data = &ip6_frags_ctl.high_thresh,
46 .maxlen = sizeof(int),
47 .mode = 0644,
48 .proc_handler = &proc_dointvec
49 },
50 {
51 .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
52 .procname = "ip6frag_low_thresh",
53 .data = &ip6_frags_ctl.low_thresh,
54 .maxlen = sizeof(int),
55 .mode = 0644,
56 .proc_handler = &proc_dointvec
57 },
58 {
59 .ctl_name = NET_IPV6_IP6FRAG_TIME,
60 .procname = "ip6frag_time",
61 .data = &ip6_frags_ctl.timeout,
62 .maxlen = sizeof(int),
63 .mode = 0644,
64 .proc_handler = &proc_dointvec_jiffies,
65 .strategy = &sysctl_jiffies,
66 },
67 {
68 .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
69 .procname = "ip6frag_secret_interval",
70 .data = &ip6_frags_ctl.secret_interval,
71 .maxlen = sizeof(int),
72 .mode = 0644,
73 .proc_handler = &proc_dointvec_jiffies,
74 .strategy = &sysctl_jiffies
75 },
76 {
77 .ctl_name = NET_IPV6_MLD_MAX_MSF, 41 .ctl_name = NET_IPV6_MLD_MAX_MSF,
78 .procname = "mld_max_msf", 42 .procname = "mld_max_msf",
79 .data = &sysctl_mld_max_msf, 43 .data = &sysctl_mld_max_msf,
@@ -84,39 +48,106 @@ static ctl_table ipv6_table[] = {
84 { .ctl_name = 0 } 48 { .ctl_name = 0 }
85}; 49};
86 50
87static struct ctl_table_header *ipv6_sysctl_header; 51struct ctl_path net_ipv6_ctl_path[] = {
88 52 { .procname = "net", .ctl_name = CTL_NET, },
89static ctl_table ipv6_net_table[] = { 53 { .procname = "ipv6", .ctl_name = NET_IPV6, },
90 { 54 { },
91 .ctl_name = NET_IPV6,
92 .procname = "ipv6",
93 .mode = 0555,
94 .child = ipv6_table
95 },
96 { .ctl_name = 0 }
97};
98
99static ctl_table ipv6_root_table[] = {
100 {
101 .ctl_name = CTL_NET,
102 .procname = "net",
103 .mode = 0555,
104 .child = ipv6_net_table
105 },
106 { .ctl_name = 0 }
107}; 55};
56EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
108 57
109void ipv6_sysctl_register(void) 58static int ipv6_sysctl_net_init(struct net *net)
110{ 59{
111 ipv6_sysctl_header = register_sysctl_table(ipv6_root_table); 60 struct ctl_table *ipv6_table;
61 struct ctl_table *ipv6_route_table;
62 struct ctl_table *ipv6_icmp_table;
63 int err;
64
65 err = -ENOMEM;
66 ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
67 GFP_KERNEL);
68 if (!ipv6_table)
69 goto out;
70
71 ipv6_route_table = ipv6_route_sysctl_init(net);
72 if (!ipv6_route_table)
73 goto out_ipv6_table;
74
75 ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
76 if (!ipv6_icmp_table)
77 goto out_ipv6_route_table;
78
79 ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay;
80 /* ipv6_route_table[1].data will be handled when we have
81 routes per namespace */
82 ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
83 ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
84 ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
85 ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
86 ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
87 ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
88 ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
89 ipv6_table[0].child = ipv6_route_table;
90
91 ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time;
92 ipv6_table[1].child = ipv6_icmp_table;
93
94 ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
95
96 /* We don't want this value to be per namespace, it should be global
97 to all namespaces, so make it read-only when we are not in the
98 init network namespace */
99 if (net != &init_net)
100 ipv6_table[3].mode = 0444;
101
102 net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
103 ipv6_table);
104 if (!net->ipv6.sysctl.table)
105 return -ENOMEM;
106
107 if (!net->ipv6.sysctl.table)
108 goto out_ipv6_icmp_table;
109
110 err = 0;
111out:
112 return err;
113
114out_ipv6_icmp_table:
115 kfree(ipv6_icmp_table);
116out_ipv6_route_table:
117 kfree(ipv6_route_table);
118out_ipv6_table:
119 kfree(ipv6_table);
120 goto out;
112} 121}
113 122
114void ipv6_sysctl_unregister(void) 123static void ipv6_sysctl_net_exit(struct net *net)
115{ 124{
116 unregister_sysctl_table(ipv6_sysctl_header); 125 struct ctl_table *ipv6_table;
117} 126 struct ctl_table *ipv6_route_table;
127 struct ctl_table *ipv6_icmp_table;
118 128
119#endif /* CONFIG_SYSCTL */ 129 ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
130 ipv6_route_table = ipv6_table[0].child;
131 ipv6_icmp_table = ipv6_table[1].child;
120 132
133 unregister_net_sysctl_table(net->ipv6.sysctl.table);
121 134
135 kfree(ipv6_table);
136 kfree(ipv6_route_table);
137 kfree(ipv6_icmp_table);
138}
139
140static struct pernet_operations ipv6_sysctl_net_ops = {
141 .init = ipv6_sysctl_net_init,
142 .exit = ipv6_sysctl_net_exit,
143};
122 144
145int ipv6_sysctl_register(void)
146{
147 return register_pernet_subsys(&ipv6_sysctl_net_ops);
148}
149
150void ipv6_sysctl_unregister(void)
151{
152 unregister_pernet_subsys(&ipv6_sysctl_net_ops);
153}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 85208026278b..59d0029e93a7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
265 if (final_p) 265 if (final_p)
266 ipv6_addr_copy(&fl.fl6_dst, final_p); 266 ipv6_addr_copy(&fl.fl6_dst, final_p);
267 267
268 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 268 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
269 if (err == -EREMOTE) 269 if (err == -EREMOTE)
270 err = ip6_dst_blackhole(sk, &dst, &fl); 270 err = ip6_dst_blackhole(sk, &dst, &fl);
271 if (err < 0) 271 if (err < 0)
@@ -330,8 +330,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
330 struct tcp_sock *tp; 330 struct tcp_sock *tp;
331 __u32 seq; 331 __u32 seq;
332 332
333 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, 333 sk = inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, &hdr->daddr,
334 th->source, skb->dev->ifindex); 334 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
335 335
336 if (sk == NULL) { 336 if (sk == NULL) {
337 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); 337 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
@@ -561,16 +561,16 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
561 char *newkey, u8 newkeylen) 561 char *newkey, u8 newkeylen)
562{ 562{
563 /* Add key to the list */ 563 /* Add key to the list */
564 struct tcp6_md5sig_key *key; 564 struct tcp_md5sig_key *key;
565 struct tcp_sock *tp = tcp_sk(sk); 565 struct tcp_sock *tp = tcp_sk(sk);
566 struct tcp6_md5sig_key *keys; 566 struct tcp6_md5sig_key *keys;
567 567
568 key = (struct tcp6_md5sig_key*) tcp_v6_md5_do_lookup(sk, peer); 568 key = tcp_v6_md5_do_lookup(sk, peer);
569 if (key) { 569 if (key) {
570 /* modify existing entry - just update that one */ 570 /* modify existing entry - just update that one */
571 kfree(key->base.key); 571 kfree(key->key);
572 key->base.key = newkey; 572 key->key = newkey;
573 key->base.keylen = newkeylen; 573 key->keylen = newkeylen;
574 } else { 574 } else {
575 /* reallocate new list if current one is full. */ 575 /* reallocate new list if current one is full. */
576 if (!tp->md5sig_info) { 576 if (!tp->md5sig_info) {
@@ -581,7 +581,10 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
581 } 581 }
582 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 582 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
583 } 583 }
584 tcp_alloc_md5sig_pool(); 584 if (tcp_alloc_md5sig_pool() == NULL) {
585 kfree(newkey);
586 return -ENOMEM;
587 }
585 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) { 588 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
586 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) * 589 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
587 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); 590 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
@@ -634,10 +637,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
634 kfree(tp->md5sig_info->keys6); 637 kfree(tp->md5sig_info->keys6);
635 tp->md5sig_info->keys6 = NULL; 638 tp->md5sig_info->keys6 = NULL;
636 tp->md5sig_info->alloced6 = 0; 639 tp->md5sig_info->alloced6 = 0;
637
638 tcp_free_md5sig_pool();
639
640 return 0;
641 } else { 640 } else {
642 /* shrink the database */ 641 /* shrink the database */
643 if (tp->md5sig_info->entries6 != i) 642 if (tp->md5sig_info->entries6 != i)
@@ -646,6 +645,8 @@ static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
646 (tp->md5sig_info->entries6 - i) 645 (tp->md5sig_info->entries6 - i)
647 * sizeof (tp->md5sig_info->keys6[0])); 646 * sizeof (tp->md5sig_info->keys6[0]));
648 } 647 }
648 tcp_free_md5sig_pool();
649 return 0;
649 } 650 }
650 } 651 }
651 return -ENOENT; 652 return -ENOENT;
@@ -732,7 +733,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
732 struct in6_addr *saddr, 733 struct in6_addr *saddr,
733 struct in6_addr *daddr, 734 struct in6_addr *daddr,
734 struct tcphdr *th, int protocol, 735 struct tcphdr *th, int protocol,
735 int tcplen) 736 unsigned int tcplen)
736{ 737{
737 struct scatterlist sg[4]; 738 struct scatterlist sg[4];
738 __u16 data_len; 739 __u16 data_len;
@@ -781,7 +782,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
781 sg_set_buf(&sg[block++], key->key, key->keylen); 782 sg_set_buf(&sg[block++], key->key, key->keylen);
782 nbytes += key->keylen; 783 nbytes += key->keylen;
783 784
784 sg_mark_end(sg, block); 785 sg_mark_end(&sg[block - 1]);
785 786
786 /* Now store the hash into the packet */ 787 /* Now store the hash into the packet */
787 err = crypto_hash_init(desc); 788 err = crypto_hash_init(desc);
@@ -817,7 +818,7 @@ static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
817 struct dst_entry *dst, 818 struct dst_entry *dst,
818 struct request_sock *req, 819 struct request_sock *req,
819 struct tcphdr *th, int protocol, 820 struct tcphdr *th, int protocol,
820 int tcplen) 821 unsigned int tcplen)
821{ 822{
822 struct in6_addr *saddr, *daddr; 823 struct in6_addr *saddr, *daddr;
823 824
@@ -984,7 +985,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
984 struct tcphdr *th = tcp_hdr(skb), *t1; 985 struct tcphdr *th = tcp_hdr(skb), *t1;
985 struct sk_buff *buff; 986 struct sk_buff *buff;
986 struct flowi fl; 987 struct flowi fl;
987 int tot_len = sizeof(*th); 988 unsigned int tot_len = sizeof(*th);
988#ifdef CONFIG_TCP_MD5SIG 989#ifdef CONFIG_TCP_MD5SIG
989 struct tcp_md5sig_key *key; 990 struct tcp_md5sig_key *key;
990#endif 991#endif
@@ -1084,7 +1085,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
1084 struct tcphdr *th = tcp_hdr(skb), *t1; 1085 struct tcphdr *th = tcp_hdr(skb), *t1;
1085 struct sk_buff *buff; 1086 struct sk_buff *buff;
1086 struct flowi fl; 1087 struct flowi fl;
1087 int tot_len = sizeof(struct tcphdr); 1088 unsigned int tot_len = sizeof(struct tcphdr);
1088 __be32 *topt; 1089 __be32 *topt;
1089#ifdef CONFIG_TCP_MD5SIG 1090#ifdef CONFIG_TCP_MD5SIG
1090 struct tcp_md5sig_key *key; 1091 struct tcp_md5sig_key *key;
@@ -1207,9 +1208,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1207 if (req) 1208 if (req)
1208 return tcp_check_req(sk, skb, req, prev); 1209 return tcp_check_req(sk, skb, req, prev);
1209 1210
1210 nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, 1211 nsk = __inet6_lookup_established(sk->sk_net, &tcp_hashinfo,
1211 th->source, &ipv6_hdr(skb)->daddr, 1212 &ipv6_hdr(skb)->saddr, th->source,
1212 ntohs(th->dest), inet6_iif(skb)); 1213 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1213 1214
1214 if (nsk) { 1215 if (nsk) {
1215 if (nsk->sk_state != TCP_TIME_WAIT) { 1216 if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1709,9 +1710,10 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1709 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); 1710 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1710 TCP_SKB_CB(skb)->sacked = 0; 1711 TCP_SKB_CB(skb)->sacked = 0;
1711 1712
1712 sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, 1713 sk = __inet6_lookup(skb->dev->nd_net, &tcp_hashinfo,
1713 &ipv6_hdr(skb)->daddr, ntohs(th->dest), 1714 &ipv6_hdr(skb)->saddr, th->source,
1714 inet6_iif(skb)); 1715 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
1716 inet6_iif(skb));
1715 1717
1716 if (!sk) 1718 if (!sk)
1717 goto no_tcp_socket; 1719 goto no_tcp_socket;
@@ -1791,7 +1793,7 @@ do_time_wait:
1791 { 1793 {
1792 struct sock *sk2; 1794 struct sock *sk2;
1793 1795
1794 sk2 = inet6_lookup_listener(&tcp_hashinfo, 1796 sk2 = inet6_lookup_listener(skb->dev->nd_net, &tcp_hashinfo,
1795 &ipv6_hdr(skb)->daddr, 1797 &ipv6_hdr(skb)->daddr,
1796 ntohs(th->dest), inet6_iif(skb)); 1798 ntohs(th->dest), inet6_iif(skb));
1797 if (sk2 != NULL) { 1799 if (sk2 != NULL) {
@@ -2107,6 +2109,8 @@ void tcp6_proc_exit(void)
2107} 2109}
2108#endif 2110#endif
2109 2111
2112DEFINE_PROTO_INUSE(tcpv6)
2113
2110struct proto tcpv6_prot = { 2114struct proto tcpv6_prot = {
2111 .name = "TCPv6", 2115 .name = "TCPv6",
2112 .owner = THIS_MODULE, 2116 .owner = THIS_MODULE,
@@ -2141,6 +2145,7 @@ struct proto tcpv6_prot = {
2141 .compat_setsockopt = compat_tcp_setsockopt, 2145 .compat_setsockopt = compat_tcp_setsockopt,
2142 .compat_getsockopt = compat_tcp_getsockopt, 2146 .compat_getsockopt = compat_tcp_getsockopt,
2143#endif 2147#endif
2148 REF_PROTO_INUSE(tcpv6)
2144}; 2149};
2145 2150
2146static struct inet6_protocol tcpv6_protocol = { 2151static struct inet6_protocol tcpv6_protocol = {
@@ -2162,14 +2167,36 @@ static struct inet_protosw tcpv6_protosw = {
2162 INET_PROTOSW_ICSK, 2167 INET_PROTOSW_ICSK,
2163}; 2168};
2164 2169
2165void __init tcpv6_init(void) 2170int __init tcpv6_init(void)
2166{ 2171{
2172 int ret;
2173
2174 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2175 if (ret)
2176 goto out;
2177
2167 /* register inet6 protocol */ 2178 /* register inet6 protocol */
2168 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0) 2179 ret = inet6_register_protosw(&tcpv6_protosw);
2169 printk(KERN_ERR "tcpv6_init: Could not register protocol\n"); 2180 if (ret)
2170 inet6_register_protosw(&tcpv6_protosw); 2181 goto out_tcpv6_protocol;
2182
2183 ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6,
2184 SOCK_RAW, IPPROTO_TCP);
2185 if (ret)
2186 goto out_tcpv6_protosw;
2187out:
2188 return ret;
2171 2189
2172 if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW, 2190out_tcpv6_protocol:
2173 IPPROTO_TCP) < 0) 2191 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2174 panic("Failed to create the TCPv6 control socket.\n"); 2192out_tcpv6_protosw:
2193 inet6_unregister_protosw(&tcpv6_protosw);
2194 goto out;
2195}
2196
2197void tcpv6_exit(void)
2198{
2199 sock_release(tcp6_socket);
2200 inet6_unregister_protosw(&tcpv6_protosw);
2201 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2175} 2202}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index caebad6ee510..53739de829db 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -34,6 +34,7 @@
34#include <linux/ipv6.h> 34#include <linux/ipv6.h>
35#include <linux/icmpv6.h> 35#include <linux/icmpv6.h>
36#include <linux/init.h> 36#include <linux/init.h>
37#include <linux/module.h>
37#include <linux/skbuff.h> 38#include <linux/skbuff.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -50,14 +51,13 @@
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
51#include "udp_impl.h" 52#include "udp_impl.h"
52 53
53DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
54
55static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) 54static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
56{ 55{
57 return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); 56 return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
58} 57}
59 58
60static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport, 59static struct sock *__udp6_lib_lookup(struct net *net,
60 struct in6_addr *saddr, __be16 sport,
61 struct in6_addr *daddr, __be16 dport, 61 struct in6_addr *daddr, __be16 dport,
62 int dif, struct hlist_head udptable[]) 62 int dif, struct hlist_head udptable[])
63{ 63{
@@ -70,7 +70,8 @@ static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
70 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { 70 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
71 struct inet_sock *inet = inet_sk(sk); 71 struct inet_sock *inet = inet_sk(sk);
72 72
73 if (sk->sk_hash == hnum && sk->sk_family == PF_INET6) { 73 if (sk->sk_net == net && sk->sk_hash == hnum &&
74 sk->sk_family == PF_INET6) {
74 struct ipv6_pinfo *np = inet6_sk(sk); 75 struct ipv6_pinfo *np = inet6_sk(sk);
75 int score = 0; 76 int score = 0;
76 if (inet->dport) { 77 if (inet->dport) {
@@ -121,6 +122,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
121 struct inet_sock *inet = inet_sk(sk); 122 struct inet_sock *inet = inet_sk(sk);
122 struct sk_buff *skb; 123 struct sk_buff *skb;
123 unsigned int ulen, copied; 124 unsigned int ulen, copied;
125 int peeked;
124 int err; 126 int err;
125 int is_udplite = IS_UDPLITE(sk); 127 int is_udplite = IS_UDPLITE(sk);
126 128
@@ -131,7 +133,8 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
131 return ipv6_recv_error(sk, msg, len); 133 return ipv6_recv_error(sk, msg, len);
132 134
133try_again: 135try_again:
134 skb = skb_recv_datagram(sk, flags, noblock, &err); 136 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
137 &peeked, &err);
135 if (!skb) 138 if (!skb)
136 goto out; 139 goto out;
137 140
@@ -164,6 +167,9 @@ try_again:
164 if (err) 167 if (err)
165 goto out_free; 168 goto out_free;
166 169
170 if (!peeked)
171 UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
172
167 sock_recv_timestamp(msg, sk, skb); 173 sock_recv_timestamp(msg, sk, skb);
168 174
169 /* Copy the address. */ 175 /* Copy the address. */
@@ -200,17 +206,20 @@ try_again:
200 err = ulen; 206 err = ulen;
201 207
202out_free: 208out_free:
209 lock_sock(sk);
203 skb_free_datagram(sk, skb); 210 skb_free_datagram(sk, skb);
211 release_sock(sk);
204out: 212out:
205 return err; 213 return err;
206 214
207csum_copy_err: 215csum_copy_err:
208 skb_kill_datagram(sk, skb, flags); 216 lock_sock(sk);
209 217 if (!skb_kill_datagram(sk, skb, flags))
210 if (flags & MSG_DONTWAIT) {
211 UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); 218 UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
219 release_sock(sk);
220
221 if (flags & MSG_DONTWAIT)
212 return -EAGAIN; 222 return -EAGAIN;
213 }
214 goto try_again; 223 goto try_again;
215} 224}
216 225
@@ -226,7 +235,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
226 struct sock *sk; 235 struct sock *sk;
227 int err; 236 int err;
228 237
229 sk = __udp6_lib_lookup(daddr, uh->dest, 238 sk = __udp6_lib_lookup(skb->dev->nd_net, daddr, uh->dest,
230 saddr, uh->source, inet6_iif(skb), udptable); 239 saddr, uh->source, inet6_iif(skb), udptable);
231 if (sk == NULL) 240 if (sk == NULL)
232 return; 241 return;
@@ -252,13 +261,14 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
252 struct inet6_skb_parm *opt, int type, 261 struct inet6_skb_parm *opt, int type,
253 int code, int offset, __be32 info ) 262 int code, int offset, __be32 info )
254{ 263{
255 return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); 264 __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
256} 265}
257 266
258int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 267int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
259{ 268{
260 struct udp_sock *up = udp_sk(sk); 269 struct udp_sock *up = udp_sk(sk);
261 int rc; 270 int rc;
271 int is_udplite = IS_UDPLITE(sk);
262 272
263 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 273 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
264 goto drop; 274 goto drop;
@@ -266,7 +276,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
266 /* 276 /*
267 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). 277 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
268 */ 278 */
269 if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 279 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
270 280
271 if (up->pcrlen == 0) { /* full coverage was set */ 281 if (up->pcrlen == 0) { /* full coverage was set */
272 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" 282 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
@@ -290,13 +300,13 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
290 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 300 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
291 /* Note that an ENOMEM error is charged twice */ 301 /* Note that an ENOMEM error is charged twice */
292 if (rc == -ENOMEM) 302 if (rc == -ENOMEM)
293 UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); 303 UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
294 goto drop; 304 goto drop;
295 } 305 }
296 UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); 306
297 return 0; 307 return 0;
298drop: 308drop:
299 UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); 309 UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
300 kfree_skb(skb); 310 kfree_skb(skb);
301 return -1; 311 return -1;
302} 312}
@@ -362,10 +372,21 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
362 while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, 372 while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
363 uh->source, saddr, dif))) { 373 uh->source, saddr, dif))) {
364 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); 374 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
365 if (buff) 375 if (buff) {
366 udpv6_queue_rcv_skb(sk2, buff); 376 bh_lock_sock_nested(sk2);
377 if (!sock_owned_by_user(sk2))
378 udpv6_queue_rcv_skb(sk2, buff);
379 else
380 sk_add_backlog(sk2, buff);
381 bh_unlock_sock(sk2);
382 }
367 } 383 }
368 udpv6_queue_rcv_skb(sk, skb); 384 bh_lock_sock_nested(sk);
385 if (!sock_owned_by_user(sk))
386 udpv6_queue_rcv_skb(sk, skb);
387 else
388 sk_add_backlog(sk, skb);
389 bh_unlock_sock(sk);
369out: 390out:
370 read_unlock(&udp_hash_lock); 391 read_unlock(&udp_hash_lock);
371 return 0; 392 return 0;
@@ -459,7 +480,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
459 * check socket cache ... must talk to Alan about his plans 480 * check socket cache ... must talk to Alan about his plans
460 * for sock caches... i'll skip this for now. 481 * for sock caches... i'll skip this for now.
461 */ 482 */
462 sk = __udp6_lib_lookup(saddr, uh->source, 483 sk = __udp6_lib_lookup(skb->dev->nd_net, saddr, uh->source,
463 daddr, uh->dest, inet6_iif(skb), udptable); 484 daddr, uh->dest, inet6_iif(skb), udptable);
464 485
465 if (sk == NULL) { 486 if (sk == NULL) {
@@ -478,7 +499,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
478 499
479 /* deliver */ 500 /* deliver */
480 501
481 udpv6_queue_rcv_skb(sk, skb); 502 bh_lock_sock_nested(sk);
503 if (!sock_owned_by_user(sk))
504 udpv6_queue_rcv_skb(sk, skb);
505 else
506 sk_add_backlog(sk, skb);
507 bh_unlock_sock(sk);
482 sock_put(sk); 508 sock_put(sk);
483 return 0; 509 return 0;
484 510
@@ -524,6 +550,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
524 struct inet_sock *inet = inet_sk(sk); 550 struct inet_sock *inet = inet_sk(sk);
525 struct flowi *fl = &inet->cork.fl; 551 struct flowi *fl = &inet->cork.fl;
526 int err = 0; 552 int err = 0;
553 int is_udplite = IS_UDPLITE(sk);
527 __wsum csum = 0; 554 __wsum csum = 0;
528 555
529 /* Grab the skbuff where UDP header space exists. */ 556 /* Grab the skbuff where UDP header space exists. */
@@ -539,7 +566,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
539 uh->len = htons(up->len); 566 uh->len = htons(up->len);
540 uh->check = 0; 567 uh->check = 0;
541 568
542 if (up->pcflag) 569 if (is_udplite)
543 csum = udplite_csum_outgoing(sk, skb); 570 csum = udplite_csum_outgoing(sk, skb);
544 else 571 else
545 csum = udp_csum_outgoing(sk, skb); 572 csum = udp_csum_outgoing(sk, skb);
@@ -555,7 +582,7 @@ out:
555 up->len = 0; 582 up->len = 0;
556 up->pending = 0; 583 up->pending = 0;
557 if (!err) 584 if (!err)
558 UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); 585 UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
559 return err; 586 return err;
560} 587}
561 588
@@ -579,7 +606,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
579 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 606 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
580 int err; 607 int err;
581 int connected = 0; 608 int connected = 0;
582 int is_udplite = up->pcflag; 609 int is_udplite = IS_UDPLITE(sk);
583 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 610 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
584 611
585 /* destination address check */ 612 /* destination address check */
@@ -749,7 +776,7 @@ do_udp_sendmsg:
749 if (final_p) 776 if (final_p)
750 ipv6_addr_copy(&fl.fl6_dst, final_p); 777 ipv6_addr_copy(&fl.fl6_dst, final_p);
751 778
752 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 779 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
753 if (err == -EREMOTE) 780 if (err == -EREMOTE)
754 err = ip6_dst_blackhole(sk, &dst, &fl); 781 err = ip6_dst_blackhole(sk, &dst, &fl);
755 if (err < 0) 782 if (err < 0)
@@ -971,6 +998,8 @@ void udp6_proc_exit(void) {
971 998
972/* ------------------------------------------------------------------------ */ 999/* ------------------------------------------------------------------------ */
973 1000
1001DEFINE_PROTO_INUSE(udpv6)
1002
974struct proto udpv6_prot = { 1003struct proto udpv6_prot = {
975 .name = "UDPv6", 1004 .name = "UDPv6",
976 .owner = THIS_MODULE, 1005 .owner = THIS_MODULE,
@@ -987,11 +1016,16 @@ struct proto udpv6_prot = {
987 .hash = udp_lib_hash, 1016 .hash = udp_lib_hash,
988 .unhash = udp_lib_unhash, 1017 .unhash = udp_lib_unhash,
989 .get_port = udp_v6_get_port, 1018 .get_port = udp_v6_get_port,
1019 .memory_allocated = &udp_memory_allocated,
1020 .sysctl_mem = sysctl_udp_mem,
1021 .sysctl_wmem = &sysctl_udp_wmem_min,
1022 .sysctl_rmem = &sysctl_udp_rmem_min,
990 .obj_size = sizeof(struct udp6_sock), 1023 .obj_size = sizeof(struct udp6_sock),
991#ifdef CONFIG_COMPAT 1024#ifdef CONFIG_COMPAT
992 .compat_setsockopt = compat_udpv6_setsockopt, 1025 .compat_setsockopt = compat_udpv6_setsockopt,
993 .compat_getsockopt = compat_udpv6_getsockopt, 1026 .compat_getsockopt = compat_udpv6_getsockopt,
994#endif 1027#endif
1028 REF_PROTO_INUSE(udpv6)
995}; 1029};
996 1030
997static struct inet_protosw udpv6_protosw = { 1031static struct inet_protosw udpv6_protosw = {
@@ -1005,9 +1039,27 @@ static struct inet_protosw udpv6_protosw = {
1005}; 1039};
1006 1040
1007 1041
1008void __init udpv6_init(void) 1042int __init udpv6_init(void)
1043{
1044 int ret;
1045
1046 ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
1047 if (ret)
1048 goto out;
1049
1050 ret = inet6_register_protosw(&udpv6_protosw);
1051 if (ret)
1052 goto out_udpv6_protocol;
1053out:
1054 return ret;
1055
1056out_udpv6_protocol:
1057 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1058 goto out;
1059}
1060
1061void udpv6_exit(void)
1009{ 1062{
1010 if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0) 1063 inet6_unregister_protosw(&udpv6_protosw);
1011 printk(KERN_ERR "udpv6_init: Could not register protocol\n"); 1064 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1012 inet6_register_protosw(&udpv6_protosw);
1013} 1065}
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 2d3fda601232..21be3a83e7bc 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -5,6 +5,7 @@
5#include <net/protocol.h> 5#include <net/protocol.h>
6#include <net/addrconf.h> 6#include <net/addrconf.h>
7#include <net/inet_common.h> 7#include <net/inet_common.h>
8#include <net/transp_v6.h>
8 9
9extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); 10extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
10extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, 11extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 766566f7de47..87d4202522ee 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -26,7 +26,7 @@ static void udplitev6_err(struct sk_buff *skb,
26 struct inet6_skb_parm *opt, 26 struct inet6_skb_parm *opt,
27 int type, int code, int offset, __be32 info) 27 int type, int code, int offset, __be32 info)
28{ 28{
29 return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); 29 __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
30} 30}
31 31
32static struct inet6_protocol udplitev6_protocol = { 32static struct inet6_protocol udplitev6_protocol = {
@@ -40,6 +40,8 @@ static int udplite_v6_get_port(struct sock *sk, unsigned short snum)
40 return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); 40 return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal);
41} 41}
42 42
43DEFINE_PROTO_INUSE(udplitev6)
44
43struct proto udplitev6_prot = { 45struct proto udplitev6_prot = {
44 .name = "UDPLITEv6", 46 .name = "UDPLITEv6",
45 .owner = THIS_MODULE, 47 .owner = THIS_MODULE,
@@ -62,6 +64,7 @@ struct proto udplitev6_prot = {
62 .compat_setsockopt = compat_udpv6_setsockopt, 64 .compat_setsockopt = compat_udpv6_setsockopt,
63 .compat_getsockopt = compat_udpv6_getsockopt, 65 .compat_getsockopt = compat_udpv6_getsockopt,
64#endif 66#endif
67 REF_PROTO_INUSE(udplitev6)
65}; 68};
66 69
67static struct inet_protosw udplite6_protosw = { 70static struct inet_protosw udplite6_protosw = {
@@ -74,12 +77,29 @@ static struct inet_protosw udplite6_protosw = {
74 .flags = INET_PROTOSW_PERMANENT, 77 .flags = INET_PROTOSW_PERMANENT,
75}; 78};
76 79
77void __init udplitev6_init(void) 80int __init udplitev6_init(void)
78{ 81{
79 if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0) 82 int ret;
80 printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__); 83
84 ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
85 if (ret)
86 goto out;
87
88 ret = inet6_register_protosw(&udplite6_protosw);
89 if (ret)
90 goto out_udplitev6_protocol;
91out:
92 return ret;
81 93
82 inet6_register_protosw(&udplite6_protosw); 94out_udplitev6_protocol:
95 inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
96 goto out;
97}
98
99void udplitev6_exit(void)
100{
101 inet6_unregister_protosw(&udplite6_protosw);
102 inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
83} 103}
84 104
85#ifdef CONFIG_PROC_FS 105#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 515783707e86..a4714d76ae6b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,120 +16,37 @@
16#include <net/ipv6.h> 16#include <net/ipv6.h>
17#include <net/xfrm.h> 17#include <net/xfrm.h>
18 18
19int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) 19int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
20{ 20{
21 int err; 21 return xfrm6_extract_header(skb);
22 __be32 seq; 22}
23 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
24 struct xfrm_state *x;
25 int xfrm_nr = 0;
26 int decaps = 0;
27 unsigned int nhoff;
28
29 nhoff = IP6CB(skb)->nhoff;
30
31 seq = 0;
32 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
33 goto drop;
34
35 do {
36 struct ipv6hdr *iph = ipv6_hdr(skb);
37
38 if (xfrm_nr == XFRM_MAX_DEPTH)
39 goto drop;
40
41 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
42 nexthdr, AF_INET6);
43 if (x == NULL)
44 goto drop;
45 spin_lock(&x->lock);
46 if (unlikely(x->km.state != XFRM_STATE_VALID))
47 goto drop_unlock;
48
49 if (x->props.replay_window && xfrm_replay_check(x, seq))
50 goto drop_unlock;
51
52 if (xfrm_state_check_expire(x))
53 goto drop_unlock;
54
55 nexthdr = x->type->input(x, skb);
56 if (nexthdr <= 0)
57 goto drop_unlock;
58
59 skb_network_header(skb)[nhoff] = nexthdr;
60
61 if (x->props.replay_window)
62 xfrm_replay_advance(x, seq);
63
64 x->curlft.bytes += skb->len;
65 x->curlft.packets++;
66
67 spin_unlock(&x->lock);
68
69 xfrm_vec[xfrm_nr++] = x;
70
71 if (x->outer_mode->input(x, skb))
72 goto drop;
73
74 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
75 decaps = 1;
76 break;
77 }
78
79 if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
80 goto drop;
81 } while (!err);
82 23
83 /* Allocate new secpath or COW existing one. */ 24int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
84 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { 25{
85 struct sec_path *sp; 26 XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
86 sp = secpath_dup(skb->sp); 27 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
87 if (!sp) 28 return xfrm_input(skb, nexthdr, spi, 0);
88 goto drop; 29}
89 if (skb->sp) 30EXPORT_SYMBOL(xfrm6_rcv_spi);
90 secpath_put(skb->sp);
91 skb->sp = sp;
92 }
93 31
94 if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH) 32int xfrm6_transport_finish(struct sk_buff *skb, int async)
95 goto drop; 33{
34 skb_network_header(skb)[IP6CB(skb)->nhoff] =
35 XFRM_MODE_SKB_CB(skb)->protocol;
96 36
97 memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, 37#ifndef CONFIG_NETFILTER
98 xfrm_nr * sizeof(xfrm_vec[0])); 38 if (!async)
99 skb->sp->len += xfrm_nr;
100
101 nf_reset(skb);
102
103 if (decaps) {
104 dst_release(skb->dst);
105 skb->dst = NULL;
106 netif_rx(skb);
107 return -1;
108 } else {
109#ifdef CONFIG_NETFILTER
110 ipv6_hdr(skb)->payload_len = htons(skb->len);
111 __skb_push(skb, skb->data - skb_network_header(skb));
112
113 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
114 ip6_rcv_finish);
115 return -1;
116#else
117 return 1; 39 return 1;
118#endif 40#endif
119 }
120 41
121drop_unlock: 42 ipv6_hdr(skb)->payload_len = htons(skb->len);
122 spin_unlock(&x->lock); 43 __skb_push(skb, skb->data - skb_network_header(skb));
123 xfrm_state_put(x); 44
124drop: 45 NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
125 while (--xfrm_nr >= 0) 46 ip6_rcv_finish);
126 xfrm_state_put(xfrm_vec[xfrm_nr]);
127 kfree_skb(skb);
128 return -1; 47 return -1;
129} 48}
130 49
131EXPORT_SYMBOL(xfrm6_rcv_spi);
132
133int xfrm6_rcv(struct sk_buff *skb) 50int xfrm6_rcv(struct sk_buff *skb)
134{ 51{
135 return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], 52 return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
@@ -144,10 +61,28 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
144 struct xfrm_state *x = NULL; 61 struct xfrm_state *x = NULL;
145 int wildcard = 0; 62 int wildcard = 0;
146 xfrm_address_t *xany; 63 xfrm_address_t *xany;
147 struct xfrm_state *xfrm_vec_one = NULL;
148 int nh = 0; 64 int nh = 0;
149 int i = 0; 65 int i = 0;
150 66
67 /* Allocate new secpath or COW existing one. */
68 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
69 struct sec_path *sp;
70
71 sp = secpath_dup(skb->sp);
72 if (!sp) {
73 XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
74 goto drop;
75 }
76 if (skb->sp)
77 secpath_put(skb->sp);
78 skb->sp = sp;
79 }
80
81 if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
82 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
83 goto drop;
84 }
85
151 xany = (xfrm_address_t *)&in6addr_any; 86 xany = (xfrm_address_t *)&in6addr_any;
152 87
153 for (i = 0; i < 3; i++) { 88 for (i = 0; i < 3; i++) {
@@ -200,47 +135,37 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
200 continue; 135 continue;
201 } 136 }
202 137
138 spin_unlock(&x->lock);
139
203 nh = x->type->input(x, skb); 140 nh = x->type->input(x, skb);
204 if (nh <= 0) { 141 if (nh <= 0) {
205 spin_unlock(&x->lock);
206 xfrm_state_put(x); 142 xfrm_state_put(x);
207 x = NULL; 143 x = NULL;
208 continue; 144 continue;
209 } 145 }
210 146
211 x->curlft.bytes += skb->len; 147 /* Found a state */
212 x->curlft.packets++;
213
214 spin_unlock(&x->lock);
215
216 xfrm_vec_one = x;
217 break; 148 break;
218 } 149 }
219 150
220 if (!xfrm_vec_one) 151 if (!x) {
152 XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
153 xfrm_audit_state_notfound_simple(skb, AF_INET6);
221 goto drop; 154 goto drop;
222
223 /* Allocate new secpath or COW existing one. */
224 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
225 struct sec_path *sp;
226 sp = secpath_dup(skb->sp);
227 if (!sp)
228 goto drop;
229 if (skb->sp)
230 secpath_put(skb->sp);
231 skb->sp = sp;
232 } 155 }
233 156
234 if (1 + skb->sp->len > XFRM_MAX_DEPTH) 157 skb->sp->xvec[skb->sp->len++] = x;
235 goto drop; 158
159 spin_lock(&x->lock);
236 160
237 skb->sp->xvec[skb->sp->len] = xfrm_vec_one; 161 x->curlft.bytes += skb->len;
238 skb->sp->len ++; 162 x->curlft.packets++;
163
164 spin_unlock(&x->lock);
239 165
240 return 1; 166 return 1;
167
241drop: 168drop:
242 if (xfrm_vec_one)
243 xfrm_state_put(xfrm_vec_one);
244 return -1; 169 return -1;
245} 170}
246 171
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 2bfb4f05c14c..0527d11c1ae3 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -19,31 +19,39 @@
19#include <net/ipv6.h> 19#include <net/ipv6.h>
20#include <net/xfrm.h> 20#include <net/xfrm.h>
21 21
22static void xfrm6_beet_make_header(struct sk_buff *skb)
23{
24 struct ipv6hdr *iph = ipv6_hdr(skb);
25
26 iph->version = 6;
27
28 memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
29 sizeof(iph->flow_lbl));
30 iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
31
32 ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
33 iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
34}
35
22/* Add encapsulation header. 36/* Add encapsulation header.
23 * 37 *
24 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 38 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
25 */ 39 */
26static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) 40static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
27{ 41{
28 struct ipv6hdr *iph, *top_iph; 42 struct ipv6hdr *top_iph;
29 u8 *prevhdr;
30 int hdr_len;
31 43
32 iph = ipv6_hdr(skb);
33
34 hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
35
36 skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
37 skb_set_network_header(skb, -x->props.header_len); 44 skb_set_network_header(skb, -x->props.header_len);
38 skb->transport_header = skb->network_header + hdr_len; 45 skb->mac_header = skb->network_header +
39 __skb_pull(skb, hdr_len); 46 offsetof(struct ipv6hdr, nexthdr);
47 skb->transport_header = skb->network_header + sizeof(*top_iph);
48
49 xfrm6_beet_make_header(skb);
40 50
41 top_iph = ipv6_hdr(skb); 51 top_iph = ipv6_hdr(skb);
42 memmove(top_iph, iph, hdr_len);
43 52
44 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); 53 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
45 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); 54 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
46
47 return 0; 55 return 0;
48} 56}
49 57
@@ -52,19 +60,21 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
52 struct ipv6hdr *ip6h; 60 struct ipv6hdr *ip6h;
53 const unsigned char *old_mac; 61 const unsigned char *old_mac;
54 int size = sizeof(struct ipv6hdr); 62 int size = sizeof(struct ipv6hdr);
55 int err = -EINVAL; 63 int err;
56 64
57 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 65 err = skb_cow_head(skb, size + skb->mac_len);
66 if (err)
58 goto out; 67 goto out;
59 68
60 skb_push(skb, size); 69 __skb_push(skb, size);
61 memmove(skb->data, skb_network_header(skb), size);
62 skb_reset_network_header(skb); 70 skb_reset_network_header(skb);
63 71
64 old_mac = skb_mac_header(skb); 72 old_mac = skb_mac_header(skb);
65 skb_set_mac_header(skb, -skb->mac_len); 73 skb_set_mac_header(skb, -skb->mac_len);
66 memmove(skb_mac_header(skb), old_mac, skb->mac_len); 74 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
67 75
76 xfrm6_beet_make_header(skb);
77
68 ip6h = ipv6_hdr(skb); 78 ip6h = ipv6_hdr(skb);
69 ip6h->payload_len = htons(skb->len - size); 79 ip6h->payload_len = htons(skb->len - size);
70 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); 80 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
@@ -75,8 +85,10 @@ out:
75} 85}
76 86
77static struct xfrm_mode xfrm6_beet_mode = { 87static struct xfrm_mode xfrm6_beet_mode = {
78 .input = xfrm6_beet_input, 88 .input2 = xfrm6_beet_input,
79 .output = xfrm6_beet_output, 89 .input = xfrm_prepare_input,
90 .output2 = xfrm6_beet_output,
91 .output = xfrm6_prepare_output,
80 .owner = THIS_MODULE, 92 .owner = THIS_MODULE,
81 .encap = XFRM_MODE_BEET, 93 .encap = XFRM_MODE_BEET,
82 .flags = XFRM_MODE_FLAG_TUNNEL, 94 .flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index a7bc8c62317a..63d5d493098a 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -28,6 +28,7 @@
28#include <linux/kernel.h> 28#include <linux/kernel.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/skbuff.h> 30#include <linux/skbuff.h>
31#include <linux/spinlock.h>
31#include <linux/stringify.h> 32#include <linux/stringify.h>
32#include <linux/time.h> 33#include <linux/time.h>
33#include <net/ipv6.h> 34#include <net/ipv6.h>
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index fd84e2217274..0c742faaa30b 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -25,46 +25,29 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
25 IP6_ECN_set_ce(inner_iph); 25 IP6_ECN_set_ce(inner_iph);
26} 26}
27 27
28static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
29{
30 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
31 IP_ECN_set_ce(ipip_hdr(skb));
32}
33
34/* Add encapsulation header. 28/* Add encapsulation header.
35 * 29 *
36 * The top IP header will be constructed per RFC 2401. 30 * The top IP header will be constructed per RFC 2401.
37 */ 31 */
38static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 32static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
39{ 33{
40 struct dst_entry *dst = skb->dst; 34 struct dst_entry *dst = skb->dst;
41 struct xfrm_dst *xdst = (struct xfrm_dst*)dst; 35 struct ipv6hdr *top_iph;
42 struct ipv6hdr *iph, *top_iph;
43 int dsfield; 36 int dsfield;
44 37
45 iph = ipv6_hdr(skb);
46
47 skb_set_network_header(skb, -x->props.header_len); 38 skb_set_network_header(skb, -x->props.header_len);
48 skb->mac_header = skb->network_header + 39 skb->mac_header = skb->network_header +
49 offsetof(struct ipv6hdr, nexthdr); 40 offsetof(struct ipv6hdr, nexthdr);
50 skb->transport_header = skb->network_header + sizeof(*iph); 41 skb->transport_header = skb->network_header + sizeof(*top_iph);
51 top_iph = ipv6_hdr(skb); 42 top_iph = ipv6_hdr(skb);
52 43
53 top_iph->version = 6; 44 top_iph->version = 6;
54 if (xdst->route->ops->family == AF_INET6) { 45
55 top_iph->priority = iph->priority; 46 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
56 top_iph->flow_lbl[0] = iph->flow_lbl[0]; 47 sizeof(top_iph->flow_lbl));
57 top_iph->flow_lbl[1] = iph->flow_lbl[1]; 48 top_iph->nexthdr = x->inner_mode->afinfo->proto;
58 top_iph->flow_lbl[2] = iph->flow_lbl[2]; 49
59 top_iph->nexthdr = IPPROTO_IPV6; 50 dsfield = XFRM_MODE_SKB_CB(skb)->tos;
60 } else {
61 top_iph->priority = 0;
62 top_iph->flow_lbl[0] = 0;
63 top_iph->flow_lbl[1] = 0;
64 top_iph->flow_lbl[2] = 0;
65 top_iph->nexthdr = IPPROTO_IPIP;
66 }
67 dsfield = ipv6_get_dsfield(top_iph);
68 dsfield = INET_ECN_encapsulate(dsfield, dsfield); 51 dsfield = INET_ECN_encapsulate(dsfield, dsfield);
69 if (x->props.flags & XFRM_STATE_NOECN) 52 if (x->props.flags & XFRM_STATE_NOECN)
70 dsfield &= ~INET_ECN_MASK; 53 dsfield &= ~INET_ECN_MASK;
@@ -72,18 +55,15 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
72 top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); 55 top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
73 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); 56 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
74 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); 57 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
75 skb->protocol = htons(ETH_P_IPV6);
76 return 0; 58 return 0;
77} 59}
78 60
79static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 61static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
80{ 62{
81 int err = -EINVAL; 63 int err = -EINVAL;
82 const unsigned char *old_mac; 64 const unsigned char *old_mac;
83 const unsigned char *nh = skb_network_header(skb);
84 65
85 if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 && 66 if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
86 nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
87 goto out; 67 goto out;
88 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 68 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
89 goto out; 69 goto out;
@@ -92,17 +72,12 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
92 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 72 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
93 goto out; 73 goto out;
94 74
95 nh = skb_network_header(skb); 75 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
96 if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { 76 ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
97 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 77 ipipv6_hdr(skb));
98 ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb)); 78 if (!(x->props.flags & XFRM_STATE_NOECN))
99 if (!(x->props.flags & XFRM_STATE_NOECN)) 79 ipip6_ecn_decapsulate(skb);
100 ipip6_ecn_decapsulate(skb); 80
101 } else {
102 if (!(x->props.flags & XFRM_STATE_NOECN))
103 ip6ip_ecn_decapsulate(skb);
104 skb->protocol = htons(ETH_P_IP);
105 }
106 old_mac = skb_mac_header(skb); 81 old_mac = skb_mac_header(skb);
107 skb_set_mac_header(skb, -skb->mac_len); 82 skb_set_mac_header(skb, -skb->mac_len);
108 memmove(skb_mac_header(skb), old_mac, skb->mac_len); 83 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
@@ -114,19 +89,21 @@ out:
114} 89}
115 90
116static struct xfrm_mode xfrm6_tunnel_mode = { 91static struct xfrm_mode xfrm6_tunnel_mode = {
117 .input = xfrm6_tunnel_input, 92 .input2 = xfrm6_mode_tunnel_input,
118 .output = xfrm6_tunnel_output, 93 .input = xfrm_prepare_input,
94 .output2 = xfrm6_mode_tunnel_output,
95 .output = xfrm6_prepare_output,
119 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
120 .encap = XFRM_MODE_TUNNEL, 97 .encap = XFRM_MODE_TUNNEL,
121 .flags = XFRM_MODE_FLAG_TUNNEL, 98 .flags = XFRM_MODE_FLAG_TUNNEL,
122}; 99};
123 100
124static int __init xfrm6_tunnel_init(void) 101static int __init xfrm6_mode_tunnel_init(void)
125{ 102{
126 return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6); 103 return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
127} 104}
128 105
129static void __exit xfrm6_tunnel_exit(void) 106static void __exit xfrm6_mode_tunnel_exit(void)
130{ 107{
131 int err; 108 int err;
132 109
@@ -134,7 +111,7 @@ static void __exit xfrm6_tunnel_exit(void)
134 BUG_ON(err); 111 BUG_ON(err);
135} 112}
136 113
137module_init(xfrm6_tunnel_init); 114module_init(xfrm6_mode_tunnel_init);
138module_exit(xfrm6_tunnel_exit); 115module_exit(xfrm6_mode_tunnel_exit);
139MODULE_LICENSE("GPL"); 116MODULE_LICENSE("GPL");
140MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL); 117MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 656976760ad4..b34c58c65656 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -10,10 +10,12 @@
10 */ 10 */
11 11
12#include <linux/if_ether.h> 12#include <linux/if_ether.h>
13#include <linux/compiler.h> 13#include <linux/kernel.h>
14#include <linux/module.h>
14#include <linux/skbuff.h> 15#include <linux/skbuff.h>
15#include <linux/icmpv6.h> 16#include <linux/icmpv6.h>
16#include <linux/netfilter_ipv6.h> 17#include <linux/netfilter_ipv6.h>
18#include <net/dst.h>
17#include <net/ipv6.h> 19#include <net/ipv6.h>
18#include <net/xfrm.h> 20#include <net/xfrm.h>
19 21
@@ -43,97 +45,50 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
43 return ret; 45 return ret;
44} 46}
45 47
46static inline int xfrm6_output_one(struct sk_buff *skb) 48int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
47{ 49{
48 struct dst_entry *dst = skb->dst;
49 struct xfrm_state *x = dst->xfrm;
50 struct ipv6hdr *iph;
51 int err; 50 int err;
52 51
53 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 52 err = xfrm6_tunnel_check_size(skb);
54 err = xfrm6_tunnel_check_size(skb);
55 if (err)
56 goto error_nolock;
57 }
58
59 err = xfrm_output(skb);
60 if (err) 53 if (err)
61 goto error_nolock; 54 return err;
62 55
63 iph = ipv6_hdr(skb); 56 XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
64 iph->payload_len = htons(skb->len - sizeof(*iph));
65 57
66 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 58 return xfrm6_extract_header(skb);
67 err = 0;
68
69out_exit:
70 return err;
71error_nolock:
72 kfree_skb(skb);
73 goto out_exit;
74} 59}
75 60
76static int xfrm6_output_finish2(struct sk_buff *skb) 61int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
77{ 62{
78 int err; 63 int err;
79 64
80 while (likely((err = xfrm6_output_one(skb)) == 0)) { 65 err = x->inner_mode->afinfo->extract_output(x, skb);
81 nf_reset(skb); 66 if (err)
82 67 return err;
83 err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
84 skb->dst->dev, dst_output);
85 if (unlikely(err != 1))
86 break;
87 68
88 if (!skb->dst->xfrm) 69 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
89 return dst_output(skb); 70#ifdef CONFIG_NETFILTER
71 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
72#endif
90 73
91 err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, 74 skb->protocol = htons(ETH_P_IPV6);
92 skb->dst->dev, xfrm6_output_finish2);
93 if (unlikely(err != 1))
94 break;
95 }
96 75
97 return err; 76 return x->outer_mode->output2(x, skb);
98} 77}
78EXPORT_SYMBOL(xfrm6_prepare_output);
99 79
100static int xfrm6_output_finish(struct sk_buff *skb) 80static int xfrm6_output_finish(struct sk_buff *skb)
101{ 81{
102 struct sk_buff *segs; 82#ifdef CONFIG_NETFILTER
103 83 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
104 if (!skb_is_gso(skb)) 84#endif
105 return xfrm6_output_finish2(skb);
106 85
107 skb->protocol = htons(ETH_P_IPV6); 86 skb->protocol = htons(ETH_P_IPV6);
108 segs = skb_gso_segment(skb, 0); 87 return xfrm_output(skb);
109 kfree_skb(skb);
110 if (unlikely(IS_ERR(segs)))
111 return PTR_ERR(segs);
112
113 do {
114 struct sk_buff *nskb = segs->next;
115 int err;
116
117 segs->next = NULL;
118 err = xfrm6_output_finish2(segs);
119
120 if (unlikely(err)) {
121 while ((segs = nskb)) {
122 nskb = segs->next;
123 segs->next = NULL;
124 kfree_skb(segs);
125 }
126 return err;
127 }
128
129 segs = nskb;
130 } while (segs);
131
132 return 0;
133} 88}
134 89
135int xfrm6_output(struct sk_buff *skb) 90int xfrm6_output(struct sk_buff *skb)
136{ 91{
137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, 92 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev,
138 xfrm6_output_finish); 93 xfrm6_output_finish);
139} 94}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 82e27b80d07d..7d20199ee1f3 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -11,9 +11,11 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/compiler.h> 14#include <linux/err.h>
15#include <linux/kernel.h>
15#include <linux/netdevice.h> 16#include <linux/netdevice.h>
16#include <net/addrconf.h> 17#include <net/addrconf.h>
18#include <net/dst.h>
17#include <net/xfrm.h> 19#include <net/xfrm.h>
18#include <net/ip.h> 20#include <net/ip.h>
19#include <net/ipv6.h> 21#include <net/ipv6.h>
@@ -25,35 +27,40 @@
25static struct dst_ops xfrm6_dst_ops; 27static struct dst_ops xfrm6_dst_ops;
26static struct xfrm_policy_afinfo xfrm6_policy_afinfo; 28static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
27 29
28static int xfrm6_dst_lookup(struct xfrm_dst **xdst, struct flowi *fl) 30static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
31 xfrm_address_t *daddr)
29{ 32{
30 struct dst_entry *dst = ip6_route_output(NULL, fl); 33 struct flowi fl = {};
31 int err = dst->error; 34 struct dst_entry *dst;
32 if (!err) 35 int err;
33 *xdst = (struct xfrm_dst *) dst; 36
34 else 37 memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst));
38 if (saddr)
39 memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src));
40
41 dst = ip6_route_output(NULL, &fl);
42
43 err = dst->error;
44 if (dst->error) {
35 dst_release(dst); 45 dst_release(dst);
36 return err; 46 dst = ERR_PTR(err);
47 }
48
49 return dst;
37} 50}
38 51
39static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) 52static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
40{ 53{
41 struct rt6_info *rt; 54 struct dst_entry *dst;
42 struct flowi fl_tunnel = { 55
43 .nl_u = { 56 dst = xfrm6_dst_lookup(0, NULL, daddr);
44 .ip6_u = { 57 if (IS_ERR(dst))
45 .daddr = *(struct in6_addr *)&daddr->a6, 58 return -EHOSTUNREACH;
46 }, 59
47 }, 60 ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6,
48 }; 61 (struct in6_addr *)&saddr->a6);
49 62 dst_release(dst);
50 if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { 63 return 0;
51 ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
52 (struct in6_addr *)&saddr->a6);
53 dst_release(&rt->u.dst);
54 return 0;
55 }
56 return -EHOSTUNREACH;
57} 64}
58 65
59static struct dst_entry * 66static struct dst_entry *
@@ -86,177 +93,53 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
86 return dst; 93 return dst;
87} 94}
88 95
89static inline struct in6_addr* 96static int xfrm6_get_tos(struct flowi *fl)
90__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr)
91{ 97{
92 return (x->type->remote_addr) ? 98 return 0;
93 (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) :
94 (struct in6_addr*)&x->id.daddr;
95} 99}
96 100
97static inline struct in6_addr* 101static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
98__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) 102 int nfheader_len)
99{ 103{
100 return (x->type->local_addr) ? 104 if (dst->ops->family == AF_INET6) {
101 (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : 105 struct rt6_info *rt = (struct rt6_info*)dst;
102 (struct in6_addr*)&x->props.saddr; 106 if (rt->rt6i_node)
103} 107 path->path_cookie = rt->rt6i_node->fn_sernum;
108 }
104 109
105static inline void 110 path->u.rt6.rt6i_nfheader_len = nfheader_len;
106__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x)
107{
108 if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
109 *nflen += x->props.header_len;
110 else
111 *len += x->props.header_len;
112}
113 111
114static inline void 112 return 0;
115__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x)
116{
117 if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
118 *nflen -= x->props.header_len;
119 else
120 *len -= x->props.header_len;
121} 113}
122 114
123/* Allocate chain of dst_entry's, attach known xfrm's, calculate 115static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
124 * all the metrics... Shortly, bundle a bundle.
125 */
126
127static int
128__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
129 struct flowi *fl, struct dst_entry **dst_p)
130{ 116{
131 struct dst_entry *dst, *dst_prev; 117 struct rt6_info *rt = (struct rt6_info*)xdst->route;
132 struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
133 struct rt6_info *rt = rt0;
134 struct flowi fl_tunnel = {
135 .nl_u = {
136 .ip6_u = {
137 .saddr = fl->fl6_src,
138 .daddr = fl->fl6_dst,
139 }
140 }
141 };
142 int i;
143 int err = 0;
144 int header_len = 0;
145 int nfheader_len = 0;
146 int trailer_len = 0;
147
148 dst = dst_prev = NULL;
149 dst_hold(&rt->u.dst);
150
151 for (i = 0; i < nx; i++) {
152 struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
153 struct xfrm_dst *xdst;
154
155 if (unlikely(dst1 == NULL)) {
156 err = -ENOBUFS;
157 dst_release(&rt->u.dst);
158 goto error;
159 }
160 118
161 if (!dst) 119 xdst->u.dst.dev = dev;
162 dst = dst1; 120 dev_hold(dev);
163 else {
164 dst_prev->child = dst1;
165 dst1->flags |= DST_NOHASH;
166 dst_clone(dst1);
167 }
168
169 xdst = (struct xfrm_dst *)dst1;
170 xdst->route = &rt->u.dst;
171 xdst->genid = xfrm[i]->genid;
172 if (rt->rt6i_node)
173 xdst->route_cookie = rt->rt6i_node->fn_sernum;
174
175 dst1->next = dst_prev;
176 dst_prev = dst1;
177
178 __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
179 trailer_len += xfrm[i]->props.trailer_len;
180
181 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
182 unsigned short encap_family = xfrm[i]->props.family;
183 switch(encap_family) {
184 case AF_INET:
185 fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
186 fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
187 break;
188 case AF_INET6:
189 ipv6_addr_copy(&fl_tunnel.fl6_dst, __xfrm6_bundle_addr_remote(xfrm[i], &fl->fl6_dst));
190
191 ipv6_addr_copy(&fl_tunnel.fl6_src, __xfrm6_bundle_addr_local(xfrm[i], &fl->fl6_src));
192 break;
193 default:
194 BUG_ON(1);
195 }
196 121
197 err = xfrm_dst_lookup((struct xfrm_dst **) &rt, 122 xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev);
198 &fl_tunnel, encap_family); 123 if (!xdst->u.rt6.rt6i_idev)
199 if (err) 124 return -ENODEV;
200 goto error;
201 } else
202 dst_hold(&rt->u.dst);
203 }
204 125
205 dst_prev->child = &rt->u.dst; 126 /* Sheit... I remember I did this right. Apparently,
206 dst->path = &rt->u.dst; 127 * it was magically lost, so this code needs audit */
128 xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
129 RTF_LOCAL);
130 xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
131 xdst->u.rt6.rt6i_node = rt->rt6i_node;
207 if (rt->rt6i_node) 132 if (rt->rt6i_node)
208 ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum; 133 xdst->route_cookie = rt->rt6i_node->fn_sernum;
209 134 xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
210 *dst_p = dst; 135 xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
211 dst = dst_prev; 136 xdst->u.rt6.rt6i_src = rt->rt6i_src;
212
213 dst_prev = *dst_p;
214 i = 0;
215 for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
216 struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
217
218 dst_prev->xfrm = xfrm[i++];
219 dst_prev->dev = rt->u.dst.dev;
220 if (rt->u.dst.dev)
221 dev_hold(rt->u.dst.dev);
222 dst_prev->obsolete = -1;
223 dst_prev->flags |= DST_HOST;
224 dst_prev->lastuse = jiffies;
225 dst_prev->header_len = header_len;
226 dst_prev->nfheader_len = nfheader_len;
227 dst_prev->trailer_len = trailer_len;
228 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
229
230 /* Copy neighbour for reachability confirmation */
231 dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
232 dst_prev->input = rt->u.dst.input;
233 dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
234 /* Sheit... I remember I did this right. Apparently,
235 * it was magically lost, so this code needs audit */
236 x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
237 x->u.rt6.rt6i_metric = rt0->rt6i_metric;
238 x->u.rt6.rt6i_node = rt0->rt6i_node;
239 x->u.rt6.rt6i_gateway = rt0->rt6i_gateway;
240 memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway));
241 x->u.rt6.rt6i_dst = rt0->rt6i_dst;
242 x->u.rt6.rt6i_src = rt0->rt6i_src;
243 x->u.rt6.rt6i_idev = rt0->rt6i_idev;
244 in6_dev_hold(rt0->rt6i_idev);
245 __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm);
246 trailer_len -= x->u.dst.xfrm->props.trailer_len;
247 }
248 137
249 xfrm_init_pmtu(dst);
250 return 0; 138 return 0;
251
252error:
253 if (dst)
254 dst_free(dst);
255 return err;
256} 139}
257 140
258static inline void 141static inline void
259_decode_session6(struct sk_buff *skb, struct flowi *fl) 142_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
260{ 143{
261 u16 offset = skb_network_header_len(skb); 144 u16 offset = skb_network_header_len(skb);
262 struct ipv6hdr *hdr = ipv6_hdr(skb); 145 struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -265,8 +148,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
265 u8 nexthdr = nh[IP6CB(skb)->nhoff]; 148 u8 nexthdr = nh[IP6CB(skb)->nhoff];
266 149
267 memset(fl, 0, sizeof(struct flowi)); 150 memset(fl, 0, sizeof(struct flowi));
268 ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); 151 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
269 ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); 152 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
270 153
271 while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) { 154 while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
272 nh = skb_network_header(skb); 155 nh = skb_network_header(skb);
@@ -289,8 +172,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
289 if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) { 172 if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
290 __be16 *ports = (__be16 *)exthdr; 173 __be16 *ports = (__be16 *)exthdr;
291 174
292 fl->fl_ip_sport = ports[0]; 175 fl->fl_ip_sport = ports[!!reverse];
293 fl->fl_ip_dport = ports[1]; 176 fl->fl_ip_dport = ports[!reverse];
294 } 177 }
295 fl->proto = nexthdr; 178 fl->proto = nexthdr;
296 return; 179 return;
@@ -329,7 +212,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
329 } 212 }
330} 213}
331 214
332static inline int xfrm6_garbage_collect(void) 215static inline int xfrm6_garbage_collect(struct dst_ops *ops)
333{ 216{
334 xfrm6_policy_afinfo.garbage_collect(); 217 xfrm6_policy_afinfo.garbage_collect();
335 return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); 218 return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
@@ -362,7 +245,8 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
362 245
363 xdst = (struct xfrm_dst *)dst; 246 xdst = (struct xfrm_dst *)dst;
364 if (xdst->u.rt6.rt6i_idev->dev == dev) { 247 if (xdst->u.rt6.rt6i_idev->dev == dev) {
365 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); 248 struct inet6_dev *loopback_idev =
249 in6_dev_get(dev->nd_net->loopback_dev);
366 BUG_ON(!loopback_idev); 250 BUG_ON(!loopback_idev);
367 251
368 do { 252 do {
@@ -385,8 +269,10 @@ static struct dst_ops xfrm6_dst_ops = {
385 .update_pmtu = xfrm6_update_pmtu, 269 .update_pmtu = xfrm6_update_pmtu,
386 .destroy = xfrm6_dst_destroy, 270 .destroy = xfrm6_dst_destroy,
387 .ifdown = xfrm6_dst_ifdown, 271 .ifdown = xfrm6_dst_ifdown,
272 .local_out = __ip6_local_out,
388 .gc_thresh = 1024, 273 .gc_thresh = 1024,
389 .entry_size = sizeof(struct xfrm_dst), 274 .entry_size = sizeof(struct xfrm_dst),
275 .entries = ATOMIC_INIT(0),
390}; 276};
391 277
392static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { 278static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -395,13 +281,15 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
395 .dst_lookup = xfrm6_dst_lookup, 281 .dst_lookup = xfrm6_dst_lookup,
396 .get_saddr = xfrm6_get_saddr, 282 .get_saddr = xfrm6_get_saddr,
397 .find_bundle = __xfrm6_find_bundle, 283 .find_bundle = __xfrm6_find_bundle,
398 .bundle_create = __xfrm6_bundle_create,
399 .decode_session = _decode_session6, 284 .decode_session = _decode_session6,
285 .get_tos = xfrm6_get_tos,
286 .init_path = xfrm6_init_path,
287 .fill_dst = xfrm6_fill_dst,
400}; 288};
401 289
402static void __init xfrm6_policy_init(void) 290static int __init xfrm6_policy_init(void)
403{ 291{
404 xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); 292 return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
405} 293}
406 294
407static void xfrm6_policy_fini(void) 295static void xfrm6_policy_fini(void)
@@ -409,10 +297,22 @@ static void xfrm6_policy_fini(void)
409 xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); 297 xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
410} 298}
411 299
412void __init xfrm6_init(void) 300int __init xfrm6_init(void)
413{ 301{
414 xfrm6_policy_init(); 302 int ret;
415 xfrm6_state_init(); 303
304 ret = xfrm6_policy_init();
305 if (ret)
306 goto out;
307
308 ret = xfrm6_state_init();
309 if (ret)
310 goto out_policy;
311out:
312 return ret;
313out_policy:
314 xfrm6_policy_fini();
315 goto out;
416} 316}
417 317
418void xfrm6_fini(void) 318void xfrm6_fini(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b392bee396f1..dc817e035e23 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -14,6 +14,8 @@
14#include <net/xfrm.h> 14#include <net/xfrm.h>
15#include <linux/pfkeyv2.h> 15#include <linux/pfkeyv2.h>
16#include <linux/ipsec.h> 16#include <linux/ipsec.h>
17#include <linux/netfilter_ipv6.h>
18#include <net/dsfield.h>
17#include <net/ipv6.h> 19#include <net/ipv6.h>
18#include <net/addrconf.h> 20#include <net/addrconf.h>
19 21
@@ -168,18 +170,37 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
168 return 0; 170 return 0;
169} 171}
170 172
173int xfrm6_extract_header(struct sk_buff *skb)
174{
175 struct ipv6hdr *iph = ipv6_hdr(skb);
176
177 XFRM_MODE_SKB_CB(skb)->id = 0;
178 XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
179 XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
180 XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
181 memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
182 sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
183
184 return 0;
185}
186
171static struct xfrm_state_afinfo xfrm6_state_afinfo = { 187static struct xfrm_state_afinfo xfrm6_state_afinfo = {
172 .family = AF_INET6, 188 .family = AF_INET6,
189 .proto = IPPROTO_IPV6,
190 .eth_proto = htons(ETH_P_IPV6),
173 .owner = THIS_MODULE, 191 .owner = THIS_MODULE,
174 .init_tempsel = __xfrm6_init_tempsel, 192 .init_tempsel = __xfrm6_init_tempsel,
175 .tmpl_sort = __xfrm6_tmpl_sort, 193 .tmpl_sort = __xfrm6_tmpl_sort,
176 .state_sort = __xfrm6_state_sort, 194 .state_sort = __xfrm6_state_sort,
177 .output = xfrm6_output, 195 .output = xfrm6_output,
196 .extract_input = xfrm6_extract_input,
197 .extract_output = xfrm6_extract_output,
198 .transport_finish = xfrm6_transport_finish,
178}; 199};
179 200
180void __init xfrm6_state_init(void) 201int __init xfrm6_state_init(void)
181{ 202{
182 xfrm_state_register_afinfo(&xfrm6_state_afinfo); 203 return xfrm_state_register_afinfo(&xfrm6_state_afinfo);
183} 204}
184 205
185void xfrm6_state_fini(void) 206void xfrm6_state_fini(void)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index fae90ff31087..639fe8a6ff1e 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -319,7 +319,7 @@ static void xfrm6_tunnel_destroy(struct xfrm_state *x)
319 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); 319 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
320} 320}
321 321
322static struct xfrm_type xfrm6_tunnel_type = { 322static const struct xfrm_type xfrm6_tunnel_type = {
323 .description = "IP6IP6", 323 .description = "IP6IP6",
324 .owner = THIS_MODULE, 324 .owner = THIS_MODULE,
325 .proto = IPPROTO_IPV6, 325 .proto = IPPROTO_IPV6,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 29b063d43120..c76a9523091b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -92,11 +92,6 @@ extern int ipxrtr_route_skb(struct sk_buff *skb);
92extern struct ipx_route *ipxrtr_lookup(__be32 net); 92extern struct ipx_route *ipxrtr_lookup(__be32 net);
93extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg); 93extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
94 94
95#undef IPX_REFCNT_DEBUG
96#ifdef IPX_REFCNT_DEBUG
97atomic_t ipx_sock_nr;
98#endif
99
100struct ipx_interface *ipx_interfaces_head(void) 95struct ipx_interface *ipx_interfaces_head(void)
101{ 96{
102 struct ipx_interface *rc = NULL; 97 struct ipx_interface *rc = NULL;
@@ -151,14 +146,7 @@ static void ipx_destroy_socket(struct sock *sk)
151{ 146{
152 ipx_remove_socket(sk); 147 ipx_remove_socket(sk);
153 skb_queue_purge(&sk->sk_receive_queue); 148 skb_queue_purge(&sk->sk_receive_queue);
154#ifdef IPX_REFCNT_DEBUG 149 sk_refcnt_debug_dec(sk);
155 atomic_dec(&ipx_sock_nr);
156 printk(KERN_DEBUG "IPX socket %p released, %d are still alive\n", sk,
157 atomic_read(&ipx_sock_nr));
158 if (atomic_read(&sk->sk_refcnt) != 1)
159 printk(KERN_DEBUG "Destruction sock ipx %p delayed, cnt=%d\n",
160 sk, atomic_read(&sk->sk_refcnt));
161#endif
162 sock_put(sk); 150 sock_put(sk);
163} 151}
164 152
@@ -1381,14 +1369,11 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol)
1381 goto out; 1369 goto out;
1382 1370
1383 rc = -ENOMEM; 1371 rc = -ENOMEM;
1384 sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); 1372 sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto);
1385 if (!sk) 1373 if (!sk)
1386 goto out; 1374 goto out;
1387#ifdef IPX_REFCNT_DEBUG 1375
1388 atomic_inc(&ipx_sock_nr); 1376 sk_refcnt_debug_inc(sk);
1389 printk(KERN_DEBUG "IPX socket %p created, now we have %d alive\n", sk,
1390 atomic_read(&ipx_sock_nr));
1391#endif
1392 sock_init_data(sock, sk); 1377 sock_init_data(sock, sk);
1393 sk->sk_no_check = 1; /* Checksum off by default */ 1378 sk->sk_no_check = 1; /* Checksum off by default */
1394 sock->ops = &ipx_dgram_ops; 1379 sock->ops = &ipx_dgram_ops;
@@ -1409,6 +1394,7 @@ static int ipx_release(struct socket *sock)
1409 1394
1410 sock_set_flag(sk, SOCK_DEAD); 1395 sock_set_flag(sk, SOCK_DEAD);
1411 sock->sk = NULL; 1396 sock->sk = NULL;
1397 sk_refcnt_debug_release(sk);
1412 ipx_destroy_socket(sk); 1398 ipx_destroy_socket(sk);
1413out: 1399out:
1414 return 0; 1400 return 0;
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 0cf526450536..92fef864e852 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -28,31 +28,17 @@ static struct ctl_table ipx_table[] = {
28 { 0 }, 28 { 0 },
29}; 29};
30 30
31static struct ctl_table ipx_dir_table[] = { 31static struct ctl_path ipx_path[] = {
32 { 32 { .procname = "net", .ctl_name = CTL_NET, },
33 .ctl_name = NET_IPX, 33 { .procname = "ipx", .ctl_name = NET_IPX, },
34 .procname = "ipx", 34 { }
35 .mode = 0555,
36 .child = ipx_table,
37 },
38 { 0 },
39};
40
41static struct ctl_table ipx_root_table[] = {
42 {
43 .ctl_name = CTL_NET,
44 .procname = "net",
45 .mode = 0555,
46 .child = ipx_dir_table,
47 },
48 { 0 },
49}; 35};
50 36
51static struct ctl_table_header *ipx_table_header; 37static struct ctl_table_header *ipx_table_header;
52 38
53void ipx_register_sysctl(void) 39void ipx_register_sysctl(void)
54{ 40{
55 ipx_table_header = register_sysctl_table(ipx_root_table); 41 ipx_table_header = register_sysctl_paths(ipx_path, ipx_table);
56} 42}
57 43
58void ipx_unregister_sysctl(void) 44void ipx_unregister_sysctl(void)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 0328ae2654f4..240b0cbfb532 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -802,12 +802,18 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
802 } 802 }
803#endif /* CONFIG_IRDA_ULTRA */ 803#endif /* CONFIG_IRDA_ULTRA */
804 804
805 self->ias_obj = irias_new_object(addr->sir_name, jiffies);
806 if (self->ias_obj == NULL)
807 return -ENOMEM;
808
805 err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); 809 err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name);
806 if (err < 0) 810 if (err < 0) {
811 kfree(self->ias_obj->name);
812 kfree(self->ias_obj);
807 return err; 813 return err;
814 }
808 815
809 /* Register with LM-IAS */ 816 /* Register with LM-IAS */
810 self->ias_obj = irias_new_object(addr->sir_name, jiffies);
811 irias_add_integer_attrib(self->ias_obj, "IrDA:TinyTP:LsapSel", 817 irias_add_integer_attrib(self->ias_obj, "IrDA:TinyTP:LsapSel",
812 self->stsap_sel, IAS_KERNEL_ATTR); 818 self->stsap_sel, IAS_KERNEL_ATTR);
813 irias_insert_object(self->ias_obj); 819 irias_insert_object(self->ias_obj);
@@ -1078,7 +1084,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol)
1078 } 1084 }
1079 1085
1080 /* Allocate networking socket */ 1086 /* Allocate networking socket */
1081 sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); 1087 sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto);
1082 if (sk == NULL) 1088 if (sk == NULL)
1083 return -ENOMEM; 1089 return -ENOMEM;
1084 1090
@@ -1118,8 +1124,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol)
1118 self->max_sdu_size_rx = TTP_SAR_UNBOUND; 1124 self->max_sdu_size_rx = TTP_SAR_UNBOUND;
1119 break; 1125 break;
1120 default: 1126 default:
1121 IRDA_ERROR("%s: protocol not supported!\n",
1122 __FUNCTION__);
1123 return -ESOCKTNOSUPPORT; 1127 return -ESOCKTNOSUPPORT;
1124 } 1128 }
1125 break; 1129 break;
@@ -1827,7 +1831,7 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
1827 struct irda_ias_set *ias_opt; 1831 struct irda_ias_set *ias_opt;
1828 struct ias_object *ias_obj; 1832 struct ias_object *ias_obj;
1829 struct ias_attrib * ias_attr; /* Attribute in IAS object */ 1833 struct ias_attrib * ias_attr; /* Attribute in IAS object */
1830 int opt; 1834 int opt, free_ias = 0;
1831 1835
1832 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self); 1836 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
1833 1837
@@ -1883,11 +1887,20 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
1883 /* Create a new object */ 1887 /* Create a new object */
1884 ias_obj = irias_new_object(ias_opt->irda_class_name, 1888 ias_obj = irias_new_object(ias_opt->irda_class_name,
1885 jiffies); 1889 jiffies);
1890 if (ias_obj == NULL) {
1891 kfree(ias_opt);
1892 return -ENOMEM;
1893 }
1894 free_ias = 1;
1886 } 1895 }
1887 1896
1888 /* Do we have the attribute already ? */ 1897 /* Do we have the attribute already ? */
1889 if(irias_find_attrib(ias_obj, ias_opt->irda_attrib_name)) { 1898 if(irias_find_attrib(ias_obj, ias_opt->irda_attrib_name)) {
1890 kfree(ias_opt); 1899 kfree(ias_opt);
1900 if (free_ias) {
1901 kfree(ias_obj->name);
1902 kfree(ias_obj);
1903 }
1891 return -EINVAL; 1904 return -EINVAL;
1892 } 1905 }
1893 1906
@@ -1906,6 +1919,11 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
1906 if(ias_opt->attribute.irda_attrib_octet_seq.len > 1919 if(ias_opt->attribute.irda_attrib_octet_seq.len >
1907 IAS_MAX_OCTET_STRING) { 1920 IAS_MAX_OCTET_STRING) {
1908 kfree(ias_opt); 1921 kfree(ias_opt);
1922 if (free_ias) {
1923 kfree(ias_obj->name);
1924 kfree(ias_obj);
1925 }
1926
1909 return -EINVAL; 1927 return -EINVAL;
1910 } 1928 }
1911 /* Add an octet sequence attribute */ 1929 /* Add an octet sequence attribute */
@@ -1934,6 +1952,10 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
1934 break; 1952 break;
1935 default : 1953 default :
1936 kfree(ias_opt); 1954 kfree(ias_opt);
1955 if (free_ias) {
1956 kfree(ias_obj->name);
1957 kfree(ias_obj);
1958 }
1937 return -EINVAL; 1959 return -EINVAL;
1938 } 1960 }
1939 irias_insert_object(ias_obj); 1961 irias_insert_object(ias_obj);
@@ -2388,9 +2410,8 @@ bed:
2388 2410
2389 /* Set watchdog timer to expire in <val> ms. */ 2411 /* Set watchdog timer to expire in <val> ms. */
2390 self->errno = 0; 2412 self->errno = 0;
2391 init_timer(&self->watchdog); 2413 setup_timer(&self->watchdog, irda_discovery_timeout,
2392 self->watchdog.function = irda_discovery_timeout; 2414 (unsigned long)self);
2393 self->watchdog.data = (unsigned long) self;
2394 self->watchdog.expires = jiffies + (val * HZ/1000); 2415 self->watchdog.expires = jiffies + (val * HZ/1000);
2395 add_timer(&(self->watchdog)); 2416 add_timer(&(self->watchdog));
2396 2417
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 2d63fa8e1556..b825399fc160 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -363,6 +363,18 @@ void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb)
363 clen = skb->data[0]; 363 clen = skb->data[0];
364 364
365 /* 365 /*
366 * Input validation check: a stir4200/mcp2150 combinations sometimes
367 * results in frames with clen > remaining packet size. These are
368 * illegal; if we throw away just this frame then it seems to carry on
369 * fine
370 */
371 if (unlikely(skb->len < (clen + 1))) {
372 IRDA_DEBUG(2, "%s() throwing away illegal frame\n",
373 __FUNCTION__ );
374 return;
375 }
376
377 /*
366 * If there are any data hiding in the control channel, we must 378 * If there are any data hiding in the control channel, we must
367 * deliver it first. The side effect is that the control channel 379 * deliver it first. The side effect is that the control channel
368 * will be removed from the skb 380 * will be removed from the skb
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index e5e4792a0314..598dcbe4a501 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -496,7 +496,7 @@ static int ircomm_param_poll(void *instance, irda_param_t *param, int get)
496 IRDA_ASSERT(self != NULL, return -1;); 496 IRDA_ASSERT(self != NULL, return -1;);
497 IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); 497 IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
498 498
499 /* Poll parameters are always of lenght 0 (just a signal) */ 499 /* Poll parameters are always of length 0 (just a signal) */
500 if (!get) { 500 if (!get) {
501 /* Respond with DTE line settings */ 501 /* Respond with DTE line settings */
502 ircomm_param_request(self, IRCOMM_DTE, TRUE); 502 ircomm_param_request(self, IRCOMM_DTE, TRUE);
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 1120b150e211..be627e1f04d8 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -1245,6 +1245,7 @@ static void ircomm_tty_flow_indication(void *instance, void *sap,
1245 self->flow = cmd; 1245 self->flow = cmd;
1246} 1246}
1247 1247
1248#ifdef CONFIG_PROC_FS
1248static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf) 1249static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf)
1249{ 1250{
1250 int ret=0; 1251 int ret=0;
@@ -1354,7 +1355,6 @@ static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf)
1354 * 1355 *
1355 * 1356 *
1356 */ 1357 */
1357#ifdef CONFIG_PROC_FS
1358static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, 1358static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len,
1359 int *eof, void *unused) 1359 int *eof, void *unused)
1360{ 1360{
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 435b563d29a6..87185910d0ee 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -57,20 +57,6 @@ static void __irda_task_delete(struct irda_task *task);
57static hashbin_t *dongles = NULL; 57static hashbin_t *dongles = NULL;
58static hashbin_t *tasks = NULL; 58static hashbin_t *tasks = NULL;
59 59
60#ifdef CONFIG_IRDA_DEBUG
61static const char *task_state[] = {
62 "IRDA_TASK_INIT",
63 "IRDA_TASK_DONE",
64 "IRDA_TASK_WAIT",
65 "IRDA_TASK_WAIT1",
66 "IRDA_TASK_WAIT2",
67 "IRDA_TASK_WAIT3",
68 "IRDA_TASK_CHILD_INIT",
69 "IRDA_TASK_CHILD_WAIT",
70 "IRDA_TASK_CHILD_DONE",
71};
72#endif /* CONFIG_IRDA_DEBUG */
73
74static void irda_task_timer_expired(void *data); 60static void irda_task_timer_expired(void *data);
75 61
76int __init irda_device_init( void) 62int __init irda_device_init( void)
@@ -176,14 +162,6 @@ int irda_device_is_receiving(struct net_device *dev)
176 return req.ifr_receiving; 162 return req.ifr_receiving;
177} 163}
178 164
179void irda_task_next_state(struct irda_task *task, IRDA_TASK_STATE state)
180{
181 IRDA_DEBUG(2, "%s(), state = %s\n", __FUNCTION__, task_state[state]);
182
183 task->state = state;
184}
185EXPORT_SYMBOL(irda_task_next_state);
186
187static void __irda_task_delete(struct irda_task *task) 165static void __irda_task_delete(struct irda_task *task)
188{ 166{
189 del_timer(&task->timer); 167 del_timer(&task->timer);
@@ -191,14 +169,13 @@ static void __irda_task_delete(struct irda_task *task)
191 kfree(task); 169 kfree(task);
192} 170}
193 171
194void irda_task_delete(struct irda_task *task) 172static void irda_task_delete(struct irda_task *task)
195{ 173{
196 /* Unregister task */ 174 /* Unregister task */
197 hashbin_remove(tasks, (long) task, NULL); 175 hashbin_remove(tasks, (long) task, NULL);
198 176
199 __irda_task_delete(task); 177 __irda_task_delete(task);
200} 178}
201EXPORT_SYMBOL(irda_task_delete);
202 179
203/* 180/*
204 * Function irda_task_kick (task) 181 * Function irda_task_kick (task)
@@ -272,51 +249,6 @@ static int irda_task_kick(struct irda_task *task)
272} 249}
273 250
274/* 251/*
275 * Function irda_task_execute (instance, function, finished)
276 *
277 * This function registers and tries to execute tasks that may take some
278 * time to complete. We do it this hairy way since we may have been
279 * called from interrupt context, so it's not possible to use
280 * schedule_timeout()
281 * Two important notes :
282 * o Make sure you irda_task_delete(task); in case you delete the
283 * calling instance.
284 * o No real need to lock when calling this function, but you may
285 * want to lock within the task handler.
286 * Jean II
287 */
288struct irda_task *irda_task_execute(void *instance,
289 IRDA_TASK_CALLBACK function,
290 IRDA_TASK_CALLBACK finished,
291 struct irda_task *parent, void *param)
292{
293 struct irda_task *task;
294
295 IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
296
297 task = kmalloc(sizeof(struct irda_task), GFP_ATOMIC);
298 if (!task)
299 return NULL;
300
301 task->state = IRDA_TASK_INIT;
302 task->instance = instance;
303 task->function = function;
304 task->finished = finished;
305 task->parent = parent;
306 task->param = param;
307 task->magic = IRDA_TASK_MAGIC;
308
309 init_timer(&task->timer);
310
311 /* Register task */
312 hashbin_insert(tasks, (irda_queue_t *) task, (long) task, NULL);
313
314 /* No time to waste, so lets get going! */
315 return irda_task_kick(task) ? NULL : task;
316}
317EXPORT_SYMBOL(irda_task_execute);
318
319/*
320 * Function irda_task_timer_expired (data) 252 * Function irda_task_timer_expired (data)
321 * 253 *
322 * Task time has expired. We now try to execute task (again), and restart 254 * Task time has expired. We now try to execute task (again), and restart
@@ -364,105 +296,6 @@ struct net_device *alloc_irdadev(int sizeof_priv)
364} 296}
365EXPORT_SYMBOL(alloc_irdadev); 297EXPORT_SYMBOL(alloc_irdadev);
366 298
367/*
368 * Function irda_device_init_dongle (self, type, qos)
369 *
370 * Initialize attached dongle.
371 *
372 * Important : request_module require us to call this function with
373 * a process context and irq enabled. - Jean II
374 */
375dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
376{
377 struct dongle_reg *reg;
378 dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
379
380 might_sleep();
381
382 spin_lock(&dongles->hb_spinlock);
383 reg = hashbin_find(dongles, type, NULL);
384
385#ifdef CONFIG_KMOD
386 /* Try to load the module needed */
387 if (!reg && capable(CAP_SYS_MODULE)) {
388 spin_unlock(&dongles->hb_spinlock);
389
390 request_module("irda-dongle-%d", type);
391
392 spin_lock(&dongles->hb_spinlock);
393 reg = hashbin_find(dongles, type, NULL);
394 }
395#endif
396
397 if (!reg || !try_module_get(reg->owner) ) {
398 IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
399 type);
400 kfree(dongle);
401 dongle = NULL;
402 }
403 if (dongle) {
404 /* Bind the registration info to this particular instance */
405 dongle->issue = reg;
406 dongle->dev = dev;
407 }
408 spin_unlock(&dongles->hb_spinlock);
409 return dongle;
410}
411EXPORT_SYMBOL(irda_device_dongle_init);
412
413/*
414 * Function irda_device_dongle_cleanup (dongle)
415 */
416int irda_device_dongle_cleanup(dongle_t *dongle)
417{
418 IRDA_ASSERT(dongle != NULL, return -1;);
419
420 dongle->issue->close(dongle);
421 module_put(dongle->issue->owner);
422 kfree(dongle);
423
424 return 0;
425}
426EXPORT_SYMBOL(irda_device_dongle_cleanup);
427
428/*
429 * Function irda_device_register_dongle (dongle)
430 */
431int irda_device_register_dongle(struct dongle_reg *new)
432{
433 spin_lock(&dongles->hb_spinlock);
434 /* Check if this dongle has been registered before */
435 if (hashbin_find(dongles, new->type, NULL)) {
436 IRDA_MESSAGE("%s: Dongle type %x already registered\n",
437 __FUNCTION__, new->type);
438 } else {
439 /* Insert IrDA dongle into hashbin */
440 hashbin_insert(dongles, (irda_queue_t *) new, new->type, NULL);
441 }
442 spin_unlock(&dongles->hb_spinlock);
443
444 return 0;
445}
446EXPORT_SYMBOL(irda_device_register_dongle);
447
448/*
449 * Function irda_device_unregister_dongle (dongle)
450 *
451 * Unregister dongle, and remove dongle from list of registered dongles
452 *
453 */
454void irda_device_unregister_dongle(struct dongle_reg *dongle)
455{
456 struct dongle *node;
457
458 spin_lock(&dongles->hb_spinlock);
459 node = hashbin_remove(dongles, dongle->type, NULL);
460 if (!node)
461 IRDA_ERROR("%s: dongle not found!\n", __FUNCTION__);
462 spin_unlock(&dongles->hb_spinlock);
463}
464EXPORT_SYMBOL(irda_device_unregister_dongle);
465
466#ifdef CONFIG_ISA_DMA_API 299#ifdef CONFIG_ISA_DMA_API
467/* 300/*
468 * Function setup_dma (idev, buffer, count, mode) 301 * Function setup_dma (idev, buffer, count, mode)
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index dc5e34a01620..390a790886eb 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -579,7 +579,7 @@ static void iriap_getvaluebyclass_response(struct iriap_cb *self,
579 fp[n++] = ret_code; 579 fp[n++] = ret_code;
580 580
581 /* Insert list length (MSB first) */ 581 /* Insert list length (MSB first) */
582 tmp_be16 = __constant_htons(0x0001); 582 tmp_be16 = htons(0x0001);
583 memcpy(fp+n, &tmp_be16, 2); n += 2; 583 memcpy(fp+n, &tmp_be16, 2); n += 2;
584 584
585 /* Insert object identifier ( MSB first) */ 585 /* Insert object identifier ( MSB first) */
@@ -928,7 +928,7 @@ void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb)
928 928
929 opcode = fp[0]; 929 opcode = fp[0];
930 if (~opcode & 0x80) { 930 if (~opcode & 0x80) {
931 IRDA_WARNING("%s: IrIAS multiframe commands or results" 931 IRDA_WARNING("%s: IrIAS multiframe commands or results "
932 "is not implemented yet!\n", __FUNCTION__); 932 "is not implemented yet!\n", __FUNCTION__);
933 return; 933 return;
934 } 934 }
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 7f9c8542e5fc..1ab91f787cc1 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -296,6 +296,7 @@ void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
296 */ 296 */
297void irlan_eth_send_gratuitous_arp(struct net_device *dev) 297void irlan_eth_send_gratuitous_arp(struct net_device *dev)
298{ 298{
299#ifdef CONFIG_INET
299 struct in_device *in_dev; 300 struct in_device *in_dev;
300 301
301 /* 302 /*
@@ -303,7 +304,6 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev)
303 * is useful if we have changed access points on the same 304 * is useful if we have changed access points on the same
304 * subnet. 305 * subnet.
305 */ 306 */
306#ifdef CONFIG_INET
307 IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n"); 307 IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n");
308 rcu_read_lock(); 308 rcu_read_lock();
309 in_dev = __in_dev_get_rcu(dev); 309 in_dev = __in_dev_get_rcu(dev);
@@ -342,7 +342,7 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
342 342
343 if (dev->flags & IFF_PROMISC) { 343 if (dev->flags & IFF_PROMISC) {
344 /* Enable promiscuous mode */ 344 /* Enable promiscuous mode */
345 IRDA_WARNING("Promiscous mode not implemented by IrLAN!\n"); 345 IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n");
346 } 346 }
347 else if ((dev->flags & IFF_ALLMULTI) || dev->mc_count > HW_MAX_ADDRS) { 347 else if ((dev->flags & IFF_ALLMULTI) || dev->mc_count > HW_MAX_ADDRS) {
348 /* Disable promiscuous mode, use normal mode. */ 348 /* Disable promiscuous mode, use normal mode. */
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 4c33bf5c8354..6af86eba7463 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -1199,6 +1199,19 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
1199 1199
1200 switch (event) { 1200 switch (event) {
1201 case RECV_I_RSP: /* Optimize for the common case */ 1201 case RECV_I_RSP: /* Optimize for the common case */
1202 if (unlikely(skb->len <= LAP_ADDR_HEADER + LAP_CTRL_HEADER)) {
1203 /*
1204 * Input validation check: a stir4200/mcp2150
1205 * combination sometimes results in an empty i:rsp.
1206 * This makes no sense; we can just ignore the frame
1207 * and send an rr:cmd immediately. This happens before
1208 * changing nr or ns so triggers a retransmit
1209 */
1210 irlap_wait_min_turn_around(self, &self->qos_tx);
1211 irlap_send_rr_frame(self, CMD_FRAME);
1212 /* Keep state */
1213 break;
1214 }
1202 /* FIXME: must check for remote_busy below */ 1215 /* FIXME: must check for remote_busy below */
1203#ifdef CONFIG_IRDA_FAST_RR 1216#ifdef CONFIG_IRDA_FAST_RR
1204 /* 1217 /*
@@ -1514,9 +1527,15 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
1514 1527
1515 /* N2 is the disconnect timer. Until we reach it, we retry */ 1528 /* N2 is the disconnect timer. Until we reach it, we retry */
1516 if (self->retry_count < self->N2) { 1529 if (self->retry_count < self->N2) {
1517 /* Retry sending the pf bit to the secondary */ 1530 if (skb_peek(&self->wx_list) == NULL) {
1518 irlap_wait_min_turn_around(self, &self->qos_tx); 1531 /* Retry sending the pf bit to the secondary */
1519 irlap_send_rr_frame(self, CMD_FRAME); 1532 IRDA_DEBUG(4, "nrm_p: resending rr");
1533 irlap_wait_min_turn_around(self, &self->qos_tx);
1534 irlap_send_rr_frame(self, CMD_FRAME);
1535 } else {
1536 IRDA_DEBUG(4, "nrm_p: resend frames");
1537 irlap_resend_rejected_frames(self, CMD_FRAME);
1538 }
1520 1539
1521 irlap_start_final_timer(self, self->final_timeout); 1540 irlap_start_final_timer(self, self->final_timeout);
1522 self->retry_count++; 1541 self->retry_count++;
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 4f3764546b2f..7c132d6342af 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -144,7 +144,7 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos)
144 frame->control = SNRM_CMD | PF_BIT; 144 frame->control = SNRM_CMD | PF_BIT;
145 145
146 /* 146 /*
147 * If we are establishing a connection then insert QoS paramerters 147 * If we are establishing a connection then insert QoS parameters
148 */ 148 */
149 if (qos) { 149 if (qos) {
150 skb_put(tx_skb, 9); /* 25 left */ 150 skb_put(tx_skb, 9); /* 25 left */
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index cedff8068fbc..135ac6907bbf 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -103,9 +103,12 @@ int __init irlmp_init(void)
103 irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */ 103 irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */
104 strcpy(sysctl_devname, "Linux"); 104 strcpy(sysctl_devname, "Linux");
105 105
106 /* Do discovery every 3 seconds */
107 init_timer(&irlmp->discovery_timer); 106 init_timer(&irlmp->discovery_timer);
108 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ); 107
108 /* Do discovery every 3 seconds, conditionaly */
109 if (sysctl_discovery)
110 irlmp_start_discovery_timer(irlmp,
111 sysctl_discovery_timeout*HZ);
109 112
110 return 0; 113 return 0;
111} 114}
@@ -353,6 +356,7 @@ void irlmp_unregister_link(__u32 saddr)
353 /* Final cleanup */ 356 /* Final cleanup */
354 del_timer(&link->idle_timer); 357 del_timer(&link->idle_timer);
355 link->magic = 0; 358 link->magic = 0;
359 hashbin_delete(link->lsaps, (FREE_FUNC) __irlmp_close_lsap);
356 kfree(link); 360 kfree(link);
357 } 361 }
358} 362}
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
index 1bba87e78609..150cd3f1129a 100644
--- a/net/irda/irlmp_event.c
+++ b/net/irda/irlmp_event.c
@@ -174,9 +174,7 @@ void irlmp_discovery_timer_expired(void *data)
174 /* We always cleanup the log (active & passive discovery) */ 174 /* We always cleanup the log (active & passive discovery) */
175 irlmp_do_expiry(); 175 irlmp_do_expiry();
176 176
177 /* Active discovery is conditional */ 177 irlmp_do_discovery(sysctl_discovery_slots);
178 if (sysctl_discovery)
179 irlmp_do_discovery(sysctl_discovery_slots);
180 178
181 /* Restart timer */ 179 /* Restart timer */
182 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ); 180 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ);
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 2f9f8dce5a69..e0eab5927c4f 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -731,15 +731,25 @@ dev_irnet_ioctl(struct inode * inode,
731 /* Get termios */ 731 /* Get termios */
732 case TCGETS: 732 case TCGETS:
733 DEBUG(FS_INFO, "Get termios.\n"); 733 DEBUG(FS_INFO, "Get termios.\n");
734#ifndef TCGETS2
734 if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) 735 if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
735 break; 736 break;
737#else
738 if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios))
739 break;
740#endif
736 err = 0; 741 err = 0;
737 break; 742 break;
738 /* Set termios */ 743 /* Set termios */
739 case TCSETSF: 744 case TCSETSF:
740 DEBUG(FS_INFO, "Set termios.\n"); 745 DEBUG(FS_INFO, "Set termios.\n");
746#ifndef TCGETS2
741 if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) 747 if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
742 break; 748 break;
749#else
750 if(user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp))
751 break;
752#endif
743 err = 0; 753 err = 0;
744 break; 754 break;
745 755
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 565cbf0421cd..9ab3df15425d 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -29,6 +29,8 @@
29#include <linux/init.h> 29#include <linux/init.h>
30 30
31#include <net/irda/irda.h> /* irda_debug */ 31#include <net/irda/irda.h> /* irda_debug */
32#include <net/irda/irlmp.h>
33#include <net/irda/timer.h>
32#include <net/irda/irias_object.h> 34#include <net/irda/irias_object.h>
33 35
34extern int sysctl_discovery; 36extern int sysctl_discovery;
@@ -45,6 +47,8 @@ extern int sysctl_max_noreply_time;
45extern int sysctl_warn_noreply_time; 47extern int sysctl_warn_noreply_time;
46extern int sysctl_lap_keepalive_time; 48extern int sysctl_lap_keepalive_time;
47 49
50extern struct irlmp_cb *irlmp;
51
48/* this is needed for the proc_dointvec_minmax - Jean II */ 52/* this is needed for the proc_dointvec_minmax - Jean II */
49static int max_discovery_slots = 16; /* ??? */ 53static int max_discovery_slots = 16; /* ??? */
50static int min_discovery_slots = 1; 54static int min_discovery_slots = 1;
@@ -85,6 +89,27 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
85 return ret; 89 return ret;
86} 90}
87 91
92
93static int do_discovery(ctl_table *table, int write, struct file *filp,
94 void __user *buffer, size_t *lenp, loff_t *ppos)
95{
96 int ret;
97
98 ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
99 if (ret)
100 return ret;
101
102 if (irlmp == NULL)
103 return -ENODEV;
104
105 if (sysctl_discovery)
106 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ);
107 else
108 del_timer_sync(&irlmp->discovery_timer);
109
110 return ret;
111}
112
88/* One file */ 113/* One file */
89static ctl_table irda_table[] = { 114static ctl_table irda_table[] = {
90 { 115 {
@@ -93,7 +118,8 @@ static ctl_table irda_table[] = {
93 .data = &sysctl_discovery, 118 .data = &sysctl_discovery,
94 .maxlen = sizeof(int), 119 .maxlen = sizeof(int),
95 .mode = 0644, 120 .mode = 0644,
96 .proc_handler = &proc_dointvec 121 .proc_handler = &do_discovery,
122 .strategy = &sysctl_intvec
97 }, 123 },
98 { 124 {
99 .ctl_name = NET_IRDA_DEVNAME, 125 .ctl_name = NET_IRDA_DEVNAME,
@@ -234,28 +260,10 @@ static ctl_table irda_table[] = {
234 { .ctl_name = 0 } 260 { .ctl_name = 0 }
235}; 261};
236 262
237/* One directory */ 263static struct ctl_path irda_path[] = {
238static ctl_table irda_net_table[] = { 264 { .procname = "net", .ctl_name = CTL_NET, },
239 { 265 { .procname = "irda", .ctl_name = NET_IRDA, },
240 .ctl_name = NET_IRDA, 266 { }
241 .procname = "irda",
242 .maxlen = 0,
243 .mode = 0555,
244 .child = irda_table
245 },
246 { .ctl_name = 0 }
247};
248
249/* The parent directory */
250static ctl_table irda_root_table[] = {
251 {
252 .ctl_name = CTL_NET,
253 .procname = "net",
254 .maxlen = 0,
255 .mode = 0555,
256 .child = irda_net_table
257 },
258 { .ctl_name = 0 }
259}; 267};
260 268
261static struct ctl_table_header *irda_table_header; 269static struct ctl_table_header *irda_table_header;
@@ -268,7 +276,7 @@ static struct ctl_table_header *irda_table_header;
268 */ 276 */
269int __init irda_sysctl_register(void) 277int __init irda_sysctl_register(void)
270{ 278{
271 irda_table_header = register_sysctl_table(irda_root_table); 279 irda_table_header = register_sysctl_paths(irda_path, irda_table);
272 if (!irda_table_header) 280 if (!irda_table_header)
273 return -ENOMEM; 281 return -ENOMEM;
274 282
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index 2627dad7cd87..722bbe044d9c 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -133,7 +133,7 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
133 int err; 133 int err;
134 134
135 p.pi = pi; /* In case handler needs to know */ 135 p.pi = pi; /* In case handler needs to know */
136 p.pl = type & PV_MASK; /* The integer type codes the lenght as well */ 136 p.pl = type & PV_MASK; /* The integer type codes the length as well */
137 p.pv.i = 0; /* Clear value */ 137 p.pv.i = 0; /* Clear value */
138 138
139 /* Call handler for this parameter */ 139 /* Call handler for this parameter */
@@ -142,7 +142,7 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
142 return err; 142 return err;
143 143
144 /* 144 /*
145 * If parameter lenght is still 0, then (1) this is an any length 145 * If parameter length is still 0, then (1) this is an any length
146 * integer, and (2) the handler function does not care which length 146 * integer, and (2) the handler function does not care which length
147 * we choose to use, so we pick the one the gives the fewest bytes. 147 * we choose to use, so we pick the one the gives the fewest bytes.
148 */ 148 */
@@ -206,11 +206,11 @@ static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
206{ 206{
207 irda_param_t p; 207 irda_param_t p;
208 int n = 0; 208 int n = 0;
209 int extract_len; /* Real lenght we extract */ 209 int extract_len; /* Real length we extract */
210 int err; 210 int err;
211 211
212 p.pi = pi; /* In case handler needs to know */ 212 p.pi = pi; /* In case handler needs to know */
213 p.pl = buf[1]; /* Extract lenght of value */ 213 p.pl = buf[1]; /* Extract length of value */
214 p.pv.i = 0; /* Clear value */ 214 p.pv.i = 0; /* Clear value */
215 extract_len = p.pl; /* Default : extract all */ 215 extract_len = p.pl; /* Default : extract all */
216 216
@@ -297,7 +297,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
297 IRDA_DEBUG(2, "%s()\n", __FUNCTION__); 297 IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
298 298
299 p.pi = pi; /* In case handler needs to know */ 299 p.pi = pi; /* In case handler needs to know */
300 p.pl = buf[1]; /* Extract lenght of value */ 300 p.pl = buf[1]; /* Extract length of value */
301 301
302 IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d\n", __FUNCTION__, 302 IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d\n", __FUNCTION__,
303 p.pi, p.pl); 303 p.pi, p.pl);
@@ -339,7 +339,7 @@ static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
339 irda_param_t p; 339 irda_param_t p;
340 340
341 p.pi = pi; /* In case handler needs to know */ 341 p.pi = pi; /* In case handler needs to know */
342 p.pl = buf[1]; /* Extract lenght of value */ 342 p.pl = buf[1]; /* Extract length of value */
343 343
344 /* Check if buffer is long enough for parsing */ 344 /* Check if buffer is long enough for parsing */
345 if (len < (2+p.pl)) { 345 if (len < (2+p.pl)) {
@@ -463,7 +463,7 @@ int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len,
463 int n = 0; 463 int n = 0;
464 464
465 IRDA_ASSERT(buf != NULL, return ret;); 465 IRDA_ASSERT(buf != NULL, return ret;);
466 IRDA_ASSERT(info != 0, return ret;); 466 IRDA_ASSERT(info != NULL, return ret;);
467 467
468 pi_minor = pi & info->pi_mask; 468 pi_minor = pi & info->pi_mask;
469 pi_major = pi >> info->pi_major_offset; 469 pi_major = pi >> info->pi_major_offset;
@@ -517,7 +517,7 @@ static int irda_param_extract(void *self, __u8 *buf, int len,
517 int n = 0; 517 int n = 0;
518 518
519 IRDA_ASSERT(buf != NULL, return ret;); 519 IRDA_ASSERT(buf != NULL, return ret;);
520 IRDA_ASSERT(info != 0, return ret;); 520 IRDA_ASSERT(info != NULL, return ret;);
521 521
522 pi_minor = buf[n] & info->pi_mask; 522 pi_minor = buf[n] & info->pi_mask;
523 pi_major = buf[n] >> info->pi_major_offset; 523 pi_major = buf[n] >> info->pi_major_offset;
@@ -570,7 +570,7 @@ int irda_param_extract_all(void *self, __u8 *buf, int len,
570 int n = 0; 570 int n = 0;
571 571
572 IRDA_ASSERT(buf != NULL, return ret;); 572 IRDA_ASSERT(buf != NULL, return ret;);
573 IRDA_ASSERT(info != 0, return ret;); 573 IRDA_ASSERT(info != NULL, return ret;);
574 574
575 /* 575 /*
576 * Parse all parameters. Each parameter must be at least two bytes 576 * Parse all parameters. Each parameter must be at least two bytes
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
index e71286768a48..c246983308b8 100644
--- a/net/irda/wrapper.c
+++ b/net/irda/wrapper.c
@@ -238,7 +238,7 @@ async_bump(struct net_device *dev,
238 skb_reserve(newskb, 1); 238 skb_reserve(newskb, 1);
239 239
240 if(docopy) { 240 if(docopy) {
241 /* Copy data without CRC (lenght already checked) */ 241 /* Copy data without CRC (length already checked) */
242 skb_copy_to_linear_data(newskb, rx_buff->data, 242 skb_copy_to_linear_data(newskb, rx_buff->data,
243 rx_buff->len - 2); 243 rx_buff->len - 2);
244 /* Deliver this skb */ 244 /* Deliver this skb */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 43e01c8d382b..2255e3c082ed 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -94,13 +94,6 @@ static void iucv_sock_clear_timer(struct sock *sk)
94 sk_stop_timer(sk, &sk->sk_timer); 94 sk_stop_timer(sk, &sk->sk_timer);
95} 95}
96 96
97static void iucv_sock_init_timer(struct sock *sk)
98{
99 init_timer(&sk->sk_timer);
100 sk->sk_timer.function = iucv_sock_timeout;
101 sk->sk_timer.data = (unsigned long)sk;
102}
103
104static struct sock *__iucv_get_sock_by_name(char *nm) 97static struct sock *__iucv_get_sock_by_name(char *nm)
105{ 98{
106 struct sock *sk; 99 struct sock *sk;
@@ -216,7 +209,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
216{ 209{
217 struct sock *sk; 210 struct sock *sk;
218 211
219 sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, 1); 212 sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto);
220 if (!sk) 213 if (!sk)
221 return NULL; 214 return NULL;
222 215
@@ -238,7 +231,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
238 sk->sk_protocol = proto; 231 sk->sk_protocol = proto;
239 sk->sk_state = IUCV_OPEN; 232 sk->sk_state = IUCV_OPEN;
240 233
241 iucv_sock_init_timer(sk); 234 setup_timer(&sk->sk_timer, iucv_sock_timeout, (unsigned long)sk);
242 235
243 iucv_sock_link(&iucv_sk_list, sk); 236 iucv_sock_link(&iucv_sk_list, sk);
244 return sk; 237 return sk;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index a2f5a6ea3895..f13fe8821cbd 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -97,7 +97,7 @@ struct iucv_irq_list {
97 struct iucv_irq_data data; 97 struct iucv_irq_data data;
98}; 98};
99 99
100static struct iucv_irq_data *iucv_irq_data; 100static struct iucv_irq_data *iucv_irq_data[NR_CPUS];
101static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE; 101static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE;
102static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE; 102static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE;
103 103
@@ -277,7 +277,7 @@ union iucv_param {
277/* 277/*
278 * Anchor for per-cpu IUCV command parameter block. 278 * Anchor for per-cpu IUCV command parameter block.
279 */ 279 */
280static union iucv_param *iucv_param; 280static union iucv_param *iucv_param[NR_CPUS];
281 281
282/** 282/**
283 * iucv_call_b2f0 283 * iucv_call_b2f0
@@ -356,7 +356,7 @@ static void iucv_allow_cpu(void *data)
356 * 0x10 - Flag to allow priority message completion interrupts 356 * 0x10 - Flag to allow priority message completion interrupts
357 * 0x08 - Flag to allow IUCV control interrupts 357 * 0x08 - Flag to allow IUCV control interrupts
358 */ 358 */
359 parm = percpu_ptr(iucv_param, smp_processor_id()); 359 parm = iucv_param[cpu];
360 memset(parm, 0, sizeof(union iucv_param)); 360 memset(parm, 0, sizeof(union iucv_param));
361 parm->set_mask.ipmask = 0xf8; 361 parm->set_mask.ipmask = 0xf8;
362 iucv_call_b2f0(IUCV_SETMASK, parm); 362 iucv_call_b2f0(IUCV_SETMASK, parm);
@@ -377,7 +377,7 @@ static void iucv_block_cpu(void *data)
377 union iucv_param *parm; 377 union iucv_param *parm;
378 378
379 /* Disable all iucv interrupts. */ 379 /* Disable all iucv interrupts. */
380 parm = percpu_ptr(iucv_param, smp_processor_id()); 380 parm = iucv_param[cpu];
381 memset(parm, 0, sizeof(union iucv_param)); 381 memset(parm, 0, sizeof(union iucv_param));
382 iucv_call_b2f0(IUCV_SETMASK, parm); 382 iucv_call_b2f0(IUCV_SETMASK, parm);
383 383
@@ -401,9 +401,9 @@ static void iucv_declare_cpu(void *data)
401 return; 401 return;
402 402
403 /* Declare interrupt buffer. */ 403 /* Declare interrupt buffer. */
404 parm = percpu_ptr(iucv_param, cpu); 404 parm = iucv_param[cpu];
405 memset(parm, 0, sizeof(union iucv_param)); 405 memset(parm, 0, sizeof(union iucv_param));
406 parm->db.ipbfadr1 = virt_to_phys(percpu_ptr(iucv_irq_data, cpu)); 406 parm->db.ipbfadr1 = virt_to_phys(iucv_irq_data[cpu]);
407 rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm); 407 rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm);
408 if (rc) { 408 if (rc) {
409 char *err = "Unknown"; 409 char *err = "Unknown";
@@ -458,7 +458,7 @@ static void iucv_retrieve_cpu(void *data)
458 iucv_block_cpu(NULL); 458 iucv_block_cpu(NULL);
459 459
460 /* Retrieve interrupt buffer. */ 460 /* Retrieve interrupt buffer. */
461 parm = percpu_ptr(iucv_param, cpu); 461 parm = iucv_param[cpu];
462 iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm); 462 iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm);
463 463
464 /* Clear indication that an iucv buffer exists for this cpu. */ 464 /* Clear indication that an iucv buffer exists for this cpu. */
@@ -558,22 +558,23 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
558 switch (action) { 558 switch (action) {
559 case CPU_UP_PREPARE: 559 case CPU_UP_PREPARE:
560 case CPU_UP_PREPARE_FROZEN: 560 case CPU_UP_PREPARE_FROZEN:
561 if (!percpu_populate(iucv_irq_data, 561 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
562 sizeof(struct iucv_irq_data), 562 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
563 GFP_KERNEL|GFP_DMA, cpu)) 563 if (!iucv_irq_data[cpu])
564 return NOTIFY_BAD; 564 return NOTIFY_BAD;
565 if (!percpu_populate(iucv_param, sizeof(union iucv_param), 565 iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
566 GFP_KERNEL|GFP_DMA, cpu)) { 566 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
567 percpu_depopulate(iucv_irq_data, cpu); 567 if (!iucv_param[cpu])
568 return NOTIFY_BAD; 568 return NOTIFY_BAD;
569 }
570 break; 569 break;
571 case CPU_UP_CANCELED: 570 case CPU_UP_CANCELED:
572 case CPU_UP_CANCELED_FROZEN: 571 case CPU_UP_CANCELED_FROZEN:
573 case CPU_DEAD: 572 case CPU_DEAD:
574 case CPU_DEAD_FROZEN: 573 case CPU_DEAD_FROZEN:
575 percpu_depopulate(iucv_param, cpu); 574 kfree(iucv_param[cpu]);
576 percpu_depopulate(iucv_irq_data, cpu); 575 iucv_param[cpu] = NULL;
576 kfree(iucv_irq_data[cpu]);
577 iucv_irq_data[cpu] = NULL;
577 break; 578 break;
578 case CPU_ONLINE: 579 case CPU_ONLINE:
579 case CPU_ONLINE_FROZEN: 580 case CPU_ONLINE_FROZEN:
@@ -612,7 +613,7 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
612{ 613{
613 union iucv_param *parm; 614 union iucv_param *parm;
614 615
615 parm = percpu_ptr(iucv_param, smp_processor_id()); 616 parm = iucv_param[smp_processor_id()];
616 memset(parm, 0, sizeof(union iucv_param)); 617 memset(parm, 0, sizeof(union iucv_param));
617 if (userdata) 618 if (userdata)
618 memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); 619 memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
@@ -755,7 +756,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
755 756
756 local_bh_disable(); 757 local_bh_disable();
757 /* Prepare parameter block. */ 758 /* Prepare parameter block. */
758 parm = percpu_ptr(iucv_param, smp_processor_id()); 759 parm = iucv_param[smp_processor_id()];
759 memset(parm, 0, sizeof(union iucv_param)); 760 memset(parm, 0, sizeof(union iucv_param));
760 parm->ctrl.ippathid = path->pathid; 761 parm->ctrl.ippathid = path->pathid;
761 parm->ctrl.ipmsglim = path->msglim; 762 parm->ctrl.ipmsglim = path->msglim;
@@ -799,7 +800,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
799 BUG_ON(in_atomic()); 800 BUG_ON(in_atomic());
800 spin_lock_bh(&iucv_table_lock); 801 spin_lock_bh(&iucv_table_lock);
801 iucv_cleanup_queue(); 802 iucv_cleanup_queue();
802 parm = percpu_ptr(iucv_param, smp_processor_id()); 803 parm = iucv_param[smp_processor_id()];
803 memset(parm, 0, sizeof(union iucv_param)); 804 memset(parm, 0, sizeof(union iucv_param));
804 parm->ctrl.ipmsglim = path->msglim; 805 parm->ctrl.ipmsglim = path->msglim;
805 parm->ctrl.ipflags1 = path->flags; 806 parm->ctrl.ipflags1 = path->flags;
@@ -854,7 +855,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
854 int rc; 855 int rc;
855 856
856 local_bh_disable(); 857 local_bh_disable();
857 parm = percpu_ptr(iucv_param, smp_processor_id()); 858 parm = iucv_param[smp_processor_id()];
858 memset(parm, 0, sizeof(union iucv_param)); 859 memset(parm, 0, sizeof(union iucv_param));
859 if (userdata) 860 if (userdata)
860 memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); 861 memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
@@ -881,7 +882,7 @@ int iucv_path_resume(struct iucv_path *path, u8 userdata[16])
881 int rc; 882 int rc;
882 883
883 local_bh_disable(); 884 local_bh_disable();
884 parm = percpu_ptr(iucv_param, smp_processor_id()); 885 parm = iucv_param[smp_processor_id()];
885 memset(parm, 0, sizeof(union iucv_param)); 886 memset(parm, 0, sizeof(union iucv_param));
886 if (userdata) 887 if (userdata)
887 memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); 888 memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
@@ -936,7 +937,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
936 int rc; 937 int rc;
937 938
938 local_bh_disable(); 939 local_bh_disable();
939 parm = percpu_ptr(iucv_param, smp_processor_id()); 940 parm = iucv_param[smp_processor_id()];
940 memset(parm, 0, sizeof(union iucv_param)); 941 memset(parm, 0, sizeof(union iucv_param));
941 parm->purge.ippathid = path->pathid; 942 parm->purge.ippathid = path->pathid;
942 parm->purge.ipmsgid = msg->id; 943 parm->purge.ipmsgid = msg->id;
@@ -1003,7 +1004,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
1003 } 1004 }
1004 1005
1005 local_bh_disable(); 1006 local_bh_disable();
1006 parm = percpu_ptr(iucv_param, smp_processor_id()); 1007 parm = iucv_param[smp_processor_id()];
1007 memset(parm, 0, sizeof(union iucv_param)); 1008 memset(parm, 0, sizeof(union iucv_param));
1008 parm->db.ipbfadr1 = (u32)(addr_t) buffer; 1009 parm->db.ipbfadr1 = (u32)(addr_t) buffer;
1009 parm->db.ipbfln1f = (u32) size; 1010 parm->db.ipbfln1f = (u32) size;
@@ -1040,7 +1041,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg)
1040 int rc; 1041 int rc;
1041 1042
1042 local_bh_disable(); 1043 local_bh_disable();
1043 parm = percpu_ptr(iucv_param, smp_processor_id()); 1044 parm = iucv_param[smp_processor_id()];
1044 memset(parm, 0, sizeof(union iucv_param)); 1045 memset(parm, 0, sizeof(union iucv_param));
1045 parm->db.ippathid = path->pathid; 1046 parm->db.ippathid = path->pathid;
1046 parm->db.ipmsgid = msg->id; 1047 parm->db.ipmsgid = msg->id;
@@ -1074,7 +1075,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
1074 int rc; 1075 int rc;
1075 1076
1076 local_bh_disable(); 1077 local_bh_disable();
1077 parm = percpu_ptr(iucv_param, smp_processor_id()); 1078 parm = iucv_param[smp_processor_id()];
1078 memset(parm, 0, sizeof(union iucv_param)); 1079 memset(parm, 0, sizeof(union iucv_param));
1079 if (flags & IUCV_IPRMDATA) { 1080 if (flags & IUCV_IPRMDATA) {
1080 parm->dpl.ippathid = path->pathid; 1081 parm->dpl.ippathid = path->pathid;
@@ -1118,7 +1119,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
1118 int rc; 1119 int rc;
1119 1120
1120 local_bh_disable(); 1121 local_bh_disable();
1121 parm = percpu_ptr(iucv_param, smp_processor_id()); 1122 parm = iucv_param[smp_processor_id()];
1122 memset(parm, 0, sizeof(union iucv_param)); 1123 memset(parm, 0, sizeof(union iucv_param));
1123 if (flags & IUCV_IPRMDATA) { 1124 if (flags & IUCV_IPRMDATA) {
1124 /* Message of 8 bytes can be placed into the parameter list. */ 1125 /* Message of 8 bytes can be placed into the parameter list. */
@@ -1172,7 +1173,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
1172 int rc; 1173 int rc;
1173 1174
1174 local_bh_disable(); 1175 local_bh_disable();
1175 parm = percpu_ptr(iucv_param, smp_processor_id()); 1176 parm = iucv_param[smp_processor_id()];
1176 memset(parm, 0, sizeof(union iucv_param)); 1177 memset(parm, 0, sizeof(union iucv_param));
1177 if (flags & IUCV_IPRMDATA) { 1178 if (flags & IUCV_IPRMDATA) {
1178 parm->dpl.ippathid = path->pathid; 1179 parm->dpl.ippathid = path->pathid;
@@ -1491,7 +1492,7 @@ static void iucv_tasklet_fn(unsigned long ignored)
1491 [0x08] = iucv_message_pending, 1492 [0x08] = iucv_message_pending,
1492 [0x09] = iucv_message_pending, 1493 [0x09] = iucv_message_pending,
1493 }; 1494 };
1494 struct list_head task_queue = LIST_HEAD_INIT(task_queue); 1495 LIST_HEAD(task_queue);
1495 struct iucv_irq_list *p, *n; 1496 struct iucv_irq_list *p, *n;
1496 1497
1497 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ 1498 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
@@ -1525,7 +1526,7 @@ static void iucv_tasklet_fn(unsigned long ignored)
1525static void iucv_work_fn(struct work_struct *work) 1526static void iucv_work_fn(struct work_struct *work)
1526{ 1527{
1527 typedef void iucv_irq_fn(struct iucv_irq_data *); 1528 typedef void iucv_irq_fn(struct iucv_irq_data *);
1528 struct list_head work_queue = LIST_HEAD_INIT(work_queue); 1529 LIST_HEAD(work_queue);
1529 struct iucv_irq_list *p, *n; 1530 struct iucv_irq_list *p, *n;
1530 1531
1531 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ 1532 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
@@ -1559,7 +1560,7 @@ static void iucv_external_interrupt(u16 code)
1559 struct iucv_irq_data *p; 1560 struct iucv_irq_data *p;
1560 struct iucv_irq_list *work; 1561 struct iucv_irq_list *work;
1561 1562
1562 p = percpu_ptr(iucv_irq_data, smp_processor_id()); 1563 p = iucv_irq_data[smp_processor_id()];
1563 if (p->ippathid >= iucv_max_pathid) { 1564 if (p->ippathid >= iucv_max_pathid) {
1564 printk(KERN_WARNING "iucv_do_int: Got interrupt with " 1565 printk(KERN_WARNING "iucv_do_int: Got interrupt with "
1565 "pathid %d > max_connections (%ld)\n", 1566 "pathid %d > max_connections (%ld)\n",
@@ -1598,6 +1599,7 @@ static void iucv_external_interrupt(u16 code)
1598static int __init iucv_init(void) 1599static int __init iucv_init(void)
1599{ 1600{
1600 int rc; 1601 int rc;
1602 int cpu;
1601 1603
1602 if (!MACHINE_IS_VM) { 1604 if (!MACHINE_IS_VM) {
1603 rc = -EPROTONOSUPPORT; 1605 rc = -EPROTONOSUPPORT;
@@ -1617,19 +1619,23 @@ static int __init iucv_init(void)
1617 rc = PTR_ERR(iucv_root); 1619 rc = PTR_ERR(iucv_root);
1618 goto out_bus; 1620 goto out_bus;
1619 } 1621 }
1620 /* Note: GFP_DMA used to get memory below 2G */ 1622
1621 iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data), 1623 for_each_online_cpu(cpu) {
1622 GFP_KERNEL|GFP_DMA); 1624 /* Note: GFP_DMA used to get memory below 2G */
1623 if (!iucv_irq_data) { 1625 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
1624 rc = -ENOMEM; 1626 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
1625 goto out_root; 1627 if (!iucv_irq_data[cpu]) {
1626 } 1628 rc = -ENOMEM;
1627 /* Allocate parameter blocks. */ 1629 goto out_free;
1628 iucv_param = percpu_alloc(sizeof(union iucv_param), 1630 }
1629 GFP_KERNEL|GFP_DMA); 1631
1630 if (!iucv_param) { 1632 /* Allocate parameter blocks. */
1631 rc = -ENOMEM; 1633 iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
1632 goto out_extint; 1634 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
1635 if (!iucv_param[cpu]) {
1636 rc = -ENOMEM;
1637 goto out_free;
1638 }
1633 } 1639 }
1634 register_hotcpu_notifier(&iucv_cpu_notifier); 1640 register_hotcpu_notifier(&iucv_cpu_notifier);
1635 ASCEBC(iucv_error_no_listener, 16); 1641 ASCEBC(iucv_error_no_listener, 16);
@@ -1638,9 +1644,13 @@ static int __init iucv_init(void)
1638 iucv_available = 1; 1644 iucv_available = 1;
1639 return 0; 1645 return 0;
1640 1646
1641out_extint: 1647out_free:
1642 percpu_free(iucv_irq_data); 1648 for_each_possible_cpu(cpu) {
1643out_root: 1649 kfree(iucv_param[cpu]);
1650 iucv_param[cpu] = NULL;
1651 kfree(iucv_irq_data[cpu]);
1652 iucv_irq_data[cpu] = NULL;
1653 }
1644 s390_root_dev_unregister(iucv_root); 1654 s390_root_dev_unregister(iucv_root);
1645out_bus: 1655out_bus:
1646 bus_unregister(&iucv_bus); 1656 bus_unregister(&iucv_bus);
@@ -1658,6 +1668,7 @@ out:
1658static void __exit iucv_exit(void) 1668static void __exit iucv_exit(void)
1659{ 1669{
1660 struct iucv_irq_list *p, *n; 1670 struct iucv_irq_list *p, *n;
1671 int cpu;
1661 1672
1662 spin_lock_irq(&iucv_queue_lock); 1673 spin_lock_irq(&iucv_queue_lock);
1663 list_for_each_entry_safe(p, n, &iucv_task_queue, list) 1674 list_for_each_entry_safe(p, n, &iucv_task_queue, list)
@@ -1666,8 +1677,12 @@ static void __exit iucv_exit(void)
1666 kfree(p); 1677 kfree(p);
1667 spin_unlock_irq(&iucv_queue_lock); 1678 spin_unlock_irq(&iucv_queue_lock);
1668 unregister_hotcpu_notifier(&iucv_cpu_notifier); 1679 unregister_hotcpu_notifier(&iucv_cpu_notifier);
1669 percpu_free(iucv_param); 1680 for_each_possible_cpu(cpu) {
1670 percpu_free(iucv_irq_data); 1681 kfree(iucv_param[cpu]);
1682 iucv_param[cpu] = NULL;
1683 kfree(iucv_irq_data[cpu]);
1684 iucv_irq_data[cpu] = NULL;
1685 }
1671 s390_root_dev_unregister(iucv_root); 1686 s390_root_dev_unregister(iucv_root);
1672 bus_unregister(&iucv_bus); 1687 bus_unregister(&iucv_bus);
1673 unregister_external_interrupt(0x4000, iucv_external_interrupt); 1688 unregister_external_interrupt(0x4000, iucv_external_interrupt);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7969f8a716df..45c3c27d279a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -152,7 +152,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol)
152 return -EPROTONOSUPPORT; 152 return -EPROTONOSUPPORT;
153 153
154 err = -ENOMEM; 154 err = -ENOMEM;
155 sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); 155 sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto);
156 if (sk == NULL) 156 if (sk == NULL)
157 goto out; 157 goto out;
158 158
@@ -395,9 +395,9 @@ static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx)
395static inline int verify_sec_ctx_len(void *p) 395static inline int verify_sec_ctx_len(void *p)
396{ 396{
397 struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p; 397 struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p;
398 int len; 398 int len = sec_ctx->sadb_x_ctx_len;
399 399
400 if (sec_ctx->sadb_x_ctx_len > PAGE_SIZE) 400 if (len > PAGE_SIZE)
401 return -EINVAL; 401 return -EINVAL;
402 402
403 len = pfkey_sec_ctx_len(sec_ctx); 403 len = pfkey_sec_ctx_len(sec_ctx);
@@ -1015,9 +1015,7 @@ static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x)
1015{ 1015{
1016 struct sk_buff *skb; 1016 struct sk_buff *skb;
1017 1017
1018 spin_lock_bh(&x->lock);
1019 skb = __pfkey_xfrm_state2msg(x, 1, 3); 1018 skb = __pfkey_xfrm_state2msg(x, 1, 3);
1020 spin_unlock_bh(&x->lock);
1021 1019
1022 return skb; 1020 return skb;
1023} 1021}
@@ -1468,7 +1466,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1468 err = xfrm_state_update(x); 1466 err = xfrm_state_update(x);
1469 1467
1470 xfrm_audit_state_add(x, err ? 0 : 1, 1468 xfrm_audit_state_add(x, err ? 0 : 1,
1471 audit_get_loginuid(current->audit_context), 0); 1469 audit_get_loginuid(current), 0);
1472 1470
1473 if (err < 0) { 1471 if (err < 0) {
1474 x->km.state = XFRM_STATE_DEAD; 1472 x->km.state = XFRM_STATE_DEAD;
@@ -1522,7 +1520,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1522 km_state_notify(x, &c); 1520 km_state_notify(x, &c);
1523out: 1521out:
1524 xfrm_audit_state_delete(x, err ? 0 : 1, 1522 xfrm_audit_state_delete(x, err ? 0 : 1,
1525 audit_get_loginuid(current->audit_context), 0); 1523 audit_get_loginuid(current), 0);
1526 xfrm_state_put(x); 1524 xfrm_state_put(x);
1527 1525
1528 return err; 1526 return err;
@@ -1552,7 +1550,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1552 1550
1553 out_hdr = (struct sadb_msg *) out_skb->data; 1551 out_hdr = (struct sadb_msg *) out_skb->data;
1554 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 1552 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
1555 out_hdr->sadb_msg_type = SADB_DUMP; 1553 out_hdr->sadb_msg_type = SADB_GET;
1556 out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); 1554 out_hdr->sadb_msg_satype = pfkey_proto2satype(proto);
1557 out_hdr->sadb_msg_errno = 0; 1555 out_hdr->sadb_msg_errno = 0;
1558 out_hdr->sadb_msg_reserved = 0; 1556 out_hdr->sadb_msg_reserved = 0;
@@ -1697,7 +1695,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
1697 if (proto == 0) 1695 if (proto == 0)
1698 return -EINVAL; 1696 return -EINVAL;
1699 1697
1700 audit_info.loginuid = audit_get_loginuid(current->audit_context); 1698 audit_info.loginuid = audit_get_loginuid(current);
1701 audit_info.secid = 0; 1699 audit_info.secid = 0;
1702 err = xfrm_state_flush(proto, &audit_info); 1700 err = xfrm_state_flush(proto, &audit_info);
1703 if (err) 1701 if (err)
@@ -2275,7 +2273,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2275 hdr->sadb_msg_type != SADB_X_SPDUPDATE); 2273 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
2276 2274
2277 xfrm_audit_policy_add(xp, err ? 0 : 1, 2275 xfrm_audit_policy_add(xp, err ? 0 : 1,
2278 audit_get_loginuid(current->audit_context), 0); 2276 audit_get_loginuid(current), 0);
2279 2277
2280 if (err) 2278 if (err)
2281 goto out; 2279 goto out;
@@ -2293,8 +2291,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2293 return 0; 2291 return 0;
2294 2292
2295out: 2293out:
2296 security_xfrm_policy_free(xp); 2294 xfrm_policy_destroy(xp);
2297 kfree(xp);
2298 return err; 2295 return err;
2299} 2296}
2300 2297
@@ -2359,7 +2356,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2359 return -ENOENT; 2356 return -ENOENT;
2360 2357
2361 xfrm_audit_policy_delete(xp, err ? 0 : 1, 2358 xfrm_audit_policy_delete(xp, err ? 0 : 1,
2362 audit_get_loginuid(current->audit_context), 0); 2359 audit_get_loginuid(current), 0);
2363 2360
2364 if (err) 2361 if (err)
2365 goto out; 2362 goto out;
@@ -2620,7 +2617,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2620 2617
2621 if (delete) { 2618 if (delete) {
2622 xfrm_audit_policy_delete(xp, err ? 0 : 1, 2619 xfrm_audit_policy_delete(xp, err ? 0 : 1,
2623 audit_get_loginuid(current->audit_context), 0); 2620 audit_get_loginuid(current), 0);
2624 2621
2625 if (err) 2622 if (err)
2626 goto out; 2623 goto out;
@@ -2697,7 +2694,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2697 struct xfrm_audit audit_info; 2694 struct xfrm_audit audit_info;
2698 int err; 2695 int err;
2699 2696
2700 audit_info.loginuid = audit_get_loginuid(current->audit_context); 2697 audit_info.loginuid = audit_get_loginuid(current);
2701 audit_info.secid = 0; 2698 audit_info.secid = 0;
2702 err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); 2699 err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info);
2703 if (err) 2700 if (err)
@@ -2786,12 +2783,22 @@ static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp)
2786 2783
2787static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) 2784static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
2788{ 2785{
2789 return t->aalgos & (1 << d->desc.sadb_alg_id); 2786 unsigned int id = d->desc.sadb_alg_id;
2787
2788 if (id >= sizeof(t->aalgos) * 8)
2789 return 0;
2790
2791 return (t->aalgos >> id) & 1;
2790} 2792}
2791 2793
2792static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) 2794static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
2793{ 2795{
2794 return t->ealgos & (1 << d->desc.sadb_alg_id); 2796 unsigned int id = d->desc.sadb_alg_id;
2797
2798 if (id >= sizeof(t->ealgos) * 8)
2799 return 0;
2800
2801 return (t->ealgos >> id) & 1;
2795} 2802}
2796 2803
2797static int count_ah_combs(struct xfrm_tmpl *t) 2804static int count_ah_combs(struct xfrm_tmpl *t)
@@ -3228,8 +3235,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
3228 return xp; 3235 return xp;
3229 3236
3230out: 3237out:
3231 security_xfrm_policy_free(xp); 3238 xfrm_policy_destroy(xp);
3232 kfree(xp);
3233 return NULL; 3239 return NULL;
3234} 3240}
3235 3241
@@ -3585,27 +3591,29 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
3585 /* old ipsecrequest */ 3591 /* old ipsecrequest */
3586 int mode = pfkey_mode_from_xfrm(mp->mode); 3592 int mode = pfkey_mode_from_xfrm(mp->mode);
3587 if (mode < 0) 3593 if (mode < 0)
3588 return -EINVAL; 3594 goto err;
3589 if (set_ipsecrequest(skb, mp->proto, mode, 3595 if (set_ipsecrequest(skb, mp->proto, mode,
3590 (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), 3596 (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
3591 mp->reqid, mp->old_family, 3597 mp->reqid, mp->old_family,
3592 &mp->old_saddr, &mp->old_daddr) < 0) { 3598 &mp->old_saddr, &mp->old_daddr) < 0)
3593 return -EINVAL; 3599 goto err;
3594 }
3595 3600
3596 /* new ipsecrequest */ 3601 /* new ipsecrequest */
3597 if (set_ipsecrequest(skb, mp->proto, mode, 3602 if (set_ipsecrequest(skb, mp->proto, mode,
3598 (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), 3603 (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
3599 mp->reqid, mp->new_family, 3604 mp->reqid, mp->new_family,
3600 &mp->new_saddr, &mp->new_daddr) < 0) { 3605 &mp->new_saddr, &mp->new_daddr) < 0)
3601 return -EINVAL; 3606 goto err;
3602 }
3603 } 3607 }
3604 3608
3605 /* broadcast migrate message to sockets */ 3609 /* broadcast migrate message to sockets */
3606 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); 3610 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
3607 3611
3608 return 0; 3612 return 0;
3613
3614err:
3615 kfree_skb(skb);
3616 return -EINVAL;
3609} 3617}
3610#else 3618#else
3611static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 3619static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index a2e7aa63fd8a..2ba1bc4f3c3a 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -39,7 +39,7 @@
39#include <linux/init.h> 39#include <linux/init.h>
40#include <net/lapb.h> 40#include <net/lapb.h>
41 41
42static struct list_head lapb_list = LIST_HEAD_INIT(lapb_list); 42static LIST_HEAD(lapb_list);
43static DEFINE_RWLOCK(lapb_list_lock); 43static DEFINE_RWLOCK(lapb_list_lock);
44 44
45/* 45/*
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 8ebc2769dfda..441bc18f996d 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -831,25 +831,21 @@ static void llc_sk_init(struct sock* sk)
831 llc->inc_cntr = llc->dec_cntr = 2; 831 llc->inc_cntr = llc->dec_cntr = 2;
832 llc->dec_step = llc->connect_step = 1; 832 llc->dec_step = llc->connect_step = 1;
833 833
834 init_timer(&llc->ack_timer.timer); 834 setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb,
835 (unsigned long)sk);
835 llc->ack_timer.expire = sysctl_llc2_ack_timeout; 836 llc->ack_timer.expire = sysctl_llc2_ack_timeout;
836 llc->ack_timer.timer.data = (unsigned long)sk;
837 llc->ack_timer.timer.function = llc_conn_ack_tmr_cb;
838 837
839 init_timer(&llc->pf_cycle_timer.timer); 838 setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb,
839 (unsigned long)sk);
840 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; 840 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout;
841 llc->pf_cycle_timer.timer.data = (unsigned long)sk;
842 llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb;
843 841
844 init_timer(&llc->rej_sent_timer.timer); 842 setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb,
843 (unsigned long)sk);
845 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; 844 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout;
846 llc->rej_sent_timer.timer.data = (unsigned long)sk;
847 llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb;
848 845
849 init_timer(&llc->busy_state_timer.timer); 846 setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb,
847 (unsigned long)sk);
850 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; 848 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout;
851 llc->busy_state_timer.timer.data = (unsigned long)sk;
852 llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb;
853 849
854 llc->n2 = 2; /* max retransmit */ 850 llc->n2 = 2; /* max retransmit */
855 llc->k = 2; /* tx win size, will adjust dynam */ 851 llc->k = 2; /* tx win size, will adjust dynam */
@@ -869,7 +865,7 @@ static void llc_sk_init(struct sock* sk)
869 */ 865 */
870struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) 866struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot)
871{ 867{
872 struct sock *sk = sk_alloc(net, family, priority, prot, 1); 868 struct sock *sk = sk_alloc(net, family, priority, prot);
873 869
874 if (!sk) 870 if (!sk)
875 goto out; 871 goto out;
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 576355a192ab..6f2ea2090322 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -688,9 +688,8 @@ int __init llc_station_init(void)
688 skb_queue_head_init(&llc_main_station.mac_pdu_q); 688 skb_queue_head_init(&llc_main_station.mac_pdu_q);
689 skb_queue_head_init(&llc_main_station.ev_q.list); 689 skb_queue_head_init(&llc_main_station.ev_q.list);
690 spin_lock_init(&llc_main_station.ev_q.lock); 690 spin_lock_init(&llc_main_station.ev_q.lock);
691 init_timer(&llc_main_station.ack_timer); 691 setup_timer(&llc_main_station.ack_timer, llc_station_ack_tmr_cb,
692 llc_main_station.ack_timer.data = (unsigned long)&llc_main_station; 692 (unsigned long)&llc_main_station);
693 llc_main_station.ack_timer.function = llc_station_ack_tmr_cb;
694 llc_main_station.ack_timer.expires = jiffies + 693 llc_main_station.ack_timer.expires = jiffies +
695 sysctl_llc_station_ack_timeout; 694 sysctl_llc_station_ack_timeout;
696 skb = alloc_skb(0, GFP_ATOMIC); 695 skb = alloc_skb(0, GFP_ATOMIC);
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 46992d036017..5bef1dcf18e3 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -92,31 +92,17 @@ static struct ctl_table llc_table[] = {
92 { 0 }, 92 { 0 },
93}; 93};
94 94
95static struct ctl_table llc_dir_table[] = { 95static struct ctl_path llc_path[] = {
96 { 96 { .procname = "net", .ctl_name = CTL_NET, },
97 .ctl_name = NET_LLC, 97 { .procname = "llc", .ctl_name = NET_LLC, },
98 .procname = "llc", 98 { }
99 .mode = 0555,
100 .child = llc_table,
101 },
102 { 0 },
103};
104
105static struct ctl_table llc_root_table[] = {
106 {
107 .ctl_name = CTL_NET,
108 .procname = "net",
109 .mode = 0555,
110 .child = llc_dir_table,
111 },
112 { 0 },
113}; 99};
114 100
115static struct ctl_table_header *llc_table_header; 101static struct ctl_table_header *llc_table_header;
116 102
117int __init llc_sysctl_init(void) 103int __init llc_sysctl_init(void)
118{ 104{
119 llc_table_header = register_sysctl_table(llc_root_table); 105 llc_table_header = register_sysctl_paths(llc_path, llc_table);
120 106
121 return llc_table_header ? 0 : -ENOMEM; 107 return llc_table_header ? 0 : -ENOMEM;
122} 108}
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 6fffb3845ab6..09c255002e56 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -10,15 +10,84 @@ config MAC80211
10 select CFG80211 10 select CFG80211
11 select NET_SCH_FIFO 11 select NET_SCH_FIFO
12 ---help--- 12 ---help---
13 This option enables the hardware independent IEEE 802.11 13 This option enables the hardware independent IEEE 802.11
14 networking stack. 14 networking stack.
15
16menu "Rate control algorithm selection"
17 depends on MAC80211 != n
18
19choice
20 prompt "Default rate control algorithm"
21 default MAC80211_RC_DEFAULT_PID
22 ---help---
23 This option selects the default rate control algorithm
24 mac80211 will use. Note that this default can still be
25 overriden through the ieee80211_default_rc_algo module
26 parameter if different algorithms are available.
27
28config MAC80211_RC_DEFAULT_PID
29 bool "PID controller based rate control algorithm"
30 select MAC80211_RC_PID
31 ---help---
32 Select the PID controller based rate control as the
33 default rate control algorithm. You should choose
34 this unless you know what you are doing.
35
36config MAC80211_RC_DEFAULT_SIMPLE
37 bool "Simple rate control algorithm"
38 select MAC80211_RC_SIMPLE
39 ---help---
40 Select the simple rate control as the default rate
41 control algorithm. Note that this is a non-responsive,
42 dumb algorithm. You should choose the PID rate control
43 instead.
44
45config MAC80211_RC_DEFAULT_NONE
46 bool "No default algorithm"
47 depends on EMBEDDED
48 help
49 Selecting this option will select no default algorithm
50 and allow you to not build any. Do not choose this
51 option unless you know your driver comes with another
52 suitable algorithm.
53endchoice
54
55comment "Selecting 'y' for an algorithm will"
56comment "build the algorithm into mac80211."
57
58config MAC80211_RC_DEFAULT
59 string
60 default "pid" if MAC80211_RC_DEFAULT_PID
61 default "simple" if MAC80211_RC_DEFAULT_SIMPLE
62 default ""
63
64config MAC80211_RC_PID
65 tristate "PID controller based rate control algorithm"
66 ---help---
67 This option enables a TX rate control algorithm for
68 mac80211 that uses a PID controller to select the TX
69 rate.
70
71 Say Y or M unless you're sure you want to use a
72 different rate control algorithm.
73
74config MAC80211_RC_SIMPLE
75 tristate "Simple rate control algorithm (DEPRECATED)"
76 ---help---
77 This option enables a very simple, non-responsive TX
78 rate control algorithm. This algorithm is deprecated
79 and will be removed from the kernel in the near future.
80 It has been replaced by the PID algorithm.
81
82 Say N unless you know what you are doing.
83endmenu
15 84
16config MAC80211_LEDS 85config MAC80211_LEDS
17 bool "Enable LED triggers" 86 bool "Enable LED triggers"
18 depends on MAC80211 && LEDS_TRIGGERS 87 depends on MAC80211 && LEDS_TRIGGERS
19 ---help--- 88 ---help---
20 This option enables a few LED triggers for different 89 This option enables a few LED triggers for different
21 packet receive/transmit events. 90 packet receive/transmit events.
22 91
23config MAC80211_DEBUGFS 92config MAC80211_DEBUGFS
24 bool "Export mac80211 internals in DebugFS" 93 bool "Export mac80211 internals in DebugFS"
@@ -39,6 +108,16 @@ config MAC80211_DEBUG
39 If you are not trying to debug or develop the ieee80211 108 If you are not trying to debug or develop the ieee80211
40 subsystem, you most likely want to say N here. 109 subsystem, you most likely want to say N here.
41 110
111config MAC80211_HT_DEBUG
112 bool "Enable HT debugging output"
113 depends on MAC80211_DEBUG
114 ---help---
115 This option enables 802.11n High Throughput features
116 debug tracing output.
117
118 If you are not trying to debug of develop the ieee80211
119 subsystem, you most likely want to say N here.
120
42config MAC80211_VERBOSE_DEBUG 121config MAC80211_VERBOSE_DEBUG
43 bool "Verbose debugging output" 122 bool "Verbose debugging output"
44 depends on MAC80211_DEBUG 123 depends on MAC80211_DEBUG
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 219cd9f9341f..54f46bc80cfe 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,10 +1,15 @@
1obj-$(CONFIG_MAC80211) += mac80211.o rc80211_simple.o 1obj-$(CONFIG_MAC80211) += mac80211.o
2 2
3mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o 3# objects for PID algorithm
4mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o 4rc80211_pid-y := rc80211_pid_algo.o
5mac80211-objs-$(CONFIG_NET_SCHED) += wme.o 5rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
6 6
7mac80211-objs := \ 7# build helper for PID algorithm
8rc-pid-y := $(rc80211_pid-y)
9rc-pid-m := rc80211_pid.o
10
11# mac80211 objects
12mac80211-y := \
8 ieee80211.o \ 13 ieee80211.o \
9 ieee80211_ioctl.o \ 14 ieee80211_ioctl.o \
10 sta_info.o \ 15 sta_info.o \
@@ -22,5 +27,22 @@ mac80211-objs := \
22 tx.o \ 27 tx.o \
23 key.o \ 28 key.o \
24 util.o \ 29 util.o \
25 event.o \ 30 event.o
26 $(mac80211-objs-y) 31
32mac80211-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
33mac80211-$(CONFIG_NET_SCHED) += wme.o
34mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
35 debugfs.o \
36 debugfs_sta.o \
37 debugfs_netdev.o \
38 debugfs_key.o
39
40
41# Build rate control algorithm(s)
42CFLAGS_rc80211_simple.o += -DRC80211_SIMPLE_COMPILE
43CFLAGS_rc80211_pid_algo.o += -DRC80211_PID_COMPILE
44mac80211-$(CONFIG_MAC80211_RC_SIMPLE) += rc80211_simple.o
45mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc-pid-$(CONFIG_MAC80211_RC_PID))
46
47# Modular rate algorithms are assigned to mac80211-m - make separate modules
48obj-m += $(mac80211-m)
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index bf7ba128b963..e62fe55944b8 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -11,7 +11,6 @@
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/crypto.h> 12#include <linux/crypto.h>
13#include <linux/err.h> 13#include <linux/err.h>
14#include <asm/scatterlist.h>
15 14
16#include <net/mac80211.h> 15#include <net/mac80211.h>
17#include "ieee80211_key.h" 16#include "ieee80211_key.h"
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9e2bc1fd0237..22c9619ba776 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1,17 +1,20 @@
1/* 1/*
2 * mac80211 configuration hooks for cfg80211 2 * mac80211 configuration hooks for cfg80211
3 * 3 *
4 * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
5 * 5 *
6 * This file is GPLv2 as found in COPYING. 6 * This file is GPLv2 as found in COPYING.
7 */ 7 */
8 8
9#include <linux/ieee80211.h>
9#include <linux/nl80211.h> 10#include <linux/nl80211.h>
10#include <linux/rtnetlink.h> 11#include <linux/rtnetlink.h>
11#include <net/net_namespace.h> 12#include <net/net_namespace.h>
13#include <linux/rcupdate.h>
12#include <net/cfg80211.h> 14#include <net/cfg80211.h>
13#include "ieee80211_i.h" 15#include "ieee80211_i.h"
14#include "cfg.h" 16#include "cfg.h"
17#include "ieee80211_rate.h"
15 18
16static enum ieee80211_if_types 19static enum ieee80211_if_types
17nl80211_type_to_mac80211_type(enum nl80211_iftype type) 20nl80211_type_to_mac80211_type(enum nl80211_iftype type)
@@ -90,7 +93,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
90 93
91 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 94 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
92 95
93 if (sdata->type == IEEE80211_IF_TYPE_VLAN) 96 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
94 return -EOPNOTSUPP; 97 return -EOPNOTSUPP;
95 98
96 ieee80211_if_reinit(dev); 99 ieee80211_if_reinit(dev);
@@ -99,8 +102,553 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
99 return 0; 102 return 0;
100} 103}
101 104
105static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
106 u8 key_idx, u8 *mac_addr,
107 struct key_params *params)
108{
109 struct ieee80211_sub_if_data *sdata;
110 struct sta_info *sta = NULL;
111 enum ieee80211_key_alg alg;
112 int ret;
113
114 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
115
116 switch (params->cipher) {
117 case WLAN_CIPHER_SUITE_WEP40:
118 case WLAN_CIPHER_SUITE_WEP104:
119 alg = ALG_WEP;
120 break;
121 case WLAN_CIPHER_SUITE_TKIP:
122 alg = ALG_TKIP;
123 break;
124 case WLAN_CIPHER_SUITE_CCMP:
125 alg = ALG_CCMP;
126 break;
127 default:
128 return -EINVAL;
129 }
130
131 if (mac_addr) {
132 sta = sta_info_get(sdata->local, mac_addr);
133 if (!sta)
134 return -ENOENT;
135 }
136
137 ret = 0;
138 if (!ieee80211_key_alloc(sdata, sta, alg, key_idx,
139 params->key_len, params->key))
140 ret = -ENOMEM;
141
142 if (sta)
143 sta_info_put(sta);
144
145 return ret;
146}
147
148static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
149 u8 key_idx, u8 *mac_addr)
150{
151 struct ieee80211_sub_if_data *sdata;
152 struct sta_info *sta;
153 int ret;
154
155 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
156
157 if (mac_addr) {
158 sta = sta_info_get(sdata->local, mac_addr);
159 if (!sta)
160 return -ENOENT;
161
162 ret = 0;
163 if (sta->key)
164 ieee80211_key_free(sta->key);
165 else
166 ret = -ENOENT;
167
168 sta_info_put(sta);
169 return ret;
170 }
171
172 if (!sdata->keys[key_idx])
173 return -ENOENT;
174
175 ieee80211_key_free(sdata->keys[key_idx]);
176
177 return 0;
178}
179
180static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
181 u8 key_idx, u8 *mac_addr, void *cookie,
182 void (*callback)(void *cookie,
183 struct key_params *params))
184{
185 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
186 struct sta_info *sta = NULL;
187 u8 seq[6] = {0};
188 struct key_params params;
189 struct ieee80211_key *key;
190 u32 iv32;
191 u16 iv16;
192 int err = -ENOENT;
193
194 if (mac_addr) {
195 sta = sta_info_get(sdata->local, mac_addr);
196 if (!sta)
197 goto out;
198
199 key = sta->key;
200 } else
201 key = sdata->keys[key_idx];
202
203 if (!key)
204 goto out;
205
206 memset(&params, 0, sizeof(params));
207
208 switch (key->conf.alg) {
209 case ALG_TKIP:
210 params.cipher = WLAN_CIPHER_SUITE_TKIP;
211
212 iv32 = key->u.tkip.iv32;
213 iv16 = key->u.tkip.iv16;
214
215 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
216 sdata->local->ops->get_tkip_seq)
217 sdata->local->ops->get_tkip_seq(
218 local_to_hw(sdata->local),
219 key->conf.hw_key_idx,
220 &iv32, &iv16);
221
222 seq[0] = iv16 & 0xff;
223 seq[1] = (iv16 >> 8) & 0xff;
224 seq[2] = iv32 & 0xff;
225 seq[3] = (iv32 >> 8) & 0xff;
226 seq[4] = (iv32 >> 16) & 0xff;
227 seq[5] = (iv32 >> 24) & 0xff;
228 params.seq = seq;
229 params.seq_len = 6;
230 break;
231 case ALG_CCMP:
232 params.cipher = WLAN_CIPHER_SUITE_CCMP;
233 seq[0] = key->u.ccmp.tx_pn[5];
234 seq[1] = key->u.ccmp.tx_pn[4];
235 seq[2] = key->u.ccmp.tx_pn[3];
236 seq[3] = key->u.ccmp.tx_pn[2];
237 seq[4] = key->u.ccmp.tx_pn[1];
238 seq[5] = key->u.ccmp.tx_pn[0];
239 params.seq = seq;
240 params.seq_len = 6;
241 break;
242 case ALG_WEP:
243 if (key->conf.keylen == 5)
244 params.cipher = WLAN_CIPHER_SUITE_WEP40;
245 else
246 params.cipher = WLAN_CIPHER_SUITE_WEP104;
247 break;
248 }
249
250 params.key = key->conf.key;
251 params.key_len = key->conf.keylen;
252
253 callback(cookie, &params);
254 err = 0;
255
256 out:
257 if (sta)
258 sta_info_put(sta);
259 return err;
260}
261
262static int ieee80211_config_default_key(struct wiphy *wiphy,
263 struct net_device *dev,
264 u8 key_idx)
265{
266 struct ieee80211_sub_if_data *sdata;
267
268 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
269 ieee80211_set_default_key(sdata, key_idx);
270
271 return 0;
272}
273
274static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
275 u8 *mac, struct station_stats *stats)
276{
277 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
278 struct sta_info *sta;
279
280 sta = sta_info_get(local, mac);
281 if (!sta)
282 return -ENOENT;
283
284 /* XXX: verify sta->dev == dev */
285
286 stats->filled = STATION_STAT_INACTIVE_TIME |
287 STATION_STAT_RX_BYTES |
288 STATION_STAT_TX_BYTES;
289
290 stats->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
291 stats->rx_bytes = sta->rx_bytes;
292 stats->tx_bytes = sta->tx_bytes;
293
294 sta_info_put(sta);
295
296 return 0;
297}
298
299/*
300 * This handles both adding a beacon and setting new beacon info
301 */
302static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
303 struct beacon_parameters *params)
304{
305 struct beacon_data *new, *old;
306 int new_head_len, new_tail_len;
307 int size;
308 int err = -EINVAL;
309
310 old = sdata->u.ap.beacon;
311
312 /* head must not be zero-length */
313 if (params->head && !params->head_len)
314 return -EINVAL;
315
316 /*
317 * This is a kludge. beacon interval should really be part
318 * of the beacon information.
319 */
320 if (params->interval) {
321 sdata->local->hw.conf.beacon_int = params->interval;
322 if (ieee80211_hw_config(sdata->local))
323 return -EINVAL;
324 /*
325 * We updated some parameter so if below bails out
326 * it's not an error.
327 */
328 err = 0;
329 }
330
331 /* Need to have a beacon head if we don't have one yet */
332 if (!params->head && !old)
333 return err;
334
335 /* sorry, no way to start beaconing without dtim period */
336 if (!params->dtim_period && !old)
337 return err;
338
339 /* new or old head? */
340 if (params->head)
341 new_head_len = params->head_len;
342 else
343 new_head_len = old->head_len;
344
345 /* new or old tail? */
346 if (params->tail || !old)
347 /* params->tail_len will be zero for !params->tail */
348 new_tail_len = params->tail_len;
349 else
350 new_tail_len = old->tail_len;
351
352 size = sizeof(*new) + new_head_len + new_tail_len;
353
354 new = kzalloc(size, GFP_KERNEL);
355 if (!new)
356 return -ENOMEM;
357
358 /* start filling the new info now */
359
360 /* new or old dtim period? */
361 if (params->dtim_period)
362 new->dtim_period = params->dtim_period;
363 else
364 new->dtim_period = old->dtim_period;
365
366 /*
367 * pointers go into the block we allocated,
368 * memory is | beacon_data | head | tail |
369 */
370 new->head = ((u8 *) new) + sizeof(*new);
371 new->tail = new->head + new_head_len;
372 new->head_len = new_head_len;
373 new->tail_len = new_tail_len;
374
375 /* copy in head */
376 if (params->head)
377 memcpy(new->head, params->head, new_head_len);
378 else
379 memcpy(new->head, old->head, new_head_len);
380
381 /* copy in optional tail */
382 if (params->tail)
383 memcpy(new->tail, params->tail, new_tail_len);
384 else
385 if (old)
386 memcpy(new->tail, old->tail, new_tail_len);
387
388 rcu_assign_pointer(sdata->u.ap.beacon, new);
389
390 synchronize_rcu();
391
392 kfree(old);
393
394 return ieee80211_if_config_beacon(sdata->dev);
395}
396
397static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
398 struct beacon_parameters *params)
399{
400 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
401 struct beacon_data *old;
402
403 if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
404 return -EINVAL;
405
406 old = sdata->u.ap.beacon;
407
408 if (old)
409 return -EALREADY;
410
411 return ieee80211_config_beacon(sdata, params);
412}
413
414static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
415 struct beacon_parameters *params)
416{
417 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
418 struct beacon_data *old;
419
420 if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
421 return -EINVAL;
422
423 old = sdata->u.ap.beacon;
424
425 if (!old)
426 return -ENOENT;
427
428 return ieee80211_config_beacon(sdata, params);
429}
430
431static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
432{
433 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
434 struct beacon_data *old;
435
436 if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
437 return -EINVAL;
438
439 old = sdata->u.ap.beacon;
440
441 if (!old)
442 return -ENOENT;
443
444 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
445 synchronize_rcu();
446 kfree(old);
447
448 return ieee80211_if_config_beacon(dev);
449}
450
451/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
452struct iapp_layer2_update {
453 u8 da[ETH_ALEN]; /* broadcast */
454 u8 sa[ETH_ALEN]; /* STA addr */
455 __be16 len; /* 6 */
456 u8 dsap; /* 0 */
457 u8 ssap; /* 0 */
458 u8 control;
459 u8 xid_info[3];
460} __attribute__ ((packed));
461
462static void ieee80211_send_layer2_update(struct sta_info *sta)
463{
464 struct iapp_layer2_update *msg;
465 struct sk_buff *skb;
466
467 /* Send Level 2 Update Frame to update forwarding tables in layer 2
468 * bridge devices */
469
470 skb = dev_alloc_skb(sizeof(*msg));
471 if (!skb)
472 return;
473 msg = (struct iapp_layer2_update *)skb_put(skb, sizeof(*msg));
474
475 /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
476 * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
477
478 memset(msg->da, 0xff, ETH_ALEN);
479 memcpy(msg->sa, sta->addr, ETH_ALEN);
480 msg->len = htons(6);
481 msg->dsap = 0;
482 msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */
483 msg->control = 0xaf; /* XID response lsb.1111F101.
484 * F=0 (no poll command; unsolicited frame) */
485 msg->xid_info[0] = 0x81; /* XID format identifier */
486 msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */
487 msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */
488
489 skb->dev = sta->dev;
490 skb->protocol = eth_type_trans(skb, sta->dev);
491 memset(skb->cb, 0, sizeof(skb->cb));
492 netif_rx(skb);
493}
494
495static void sta_apply_parameters(struct ieee80211_local *local,
496 struct sta_info *sta,
497 struct station_parameters *params)
498{
499 u32 rates;
500 int i, j;
501 struct ieee80211_hw_mode *mode;
502
503 if (params->station_flags & STATION_FLAG_CHANGED) {
504 sta->flags &= ~WLAN_STA_AUTHORIZED;
505 if (params->station_flags & STATION_FLAG_AUTHORIZED)
506 sta->flags |= WLAN_STA_AUTHORIZED;
507
508 sta->flags &= ~WLAN_STA_SHORT_PREAMBLE;
509 if (params->station_flags & STATION_FLAG_SHORT_PREAMBLE)
510 sta->flags |= WLAN_STA_SHORT_PREAMBLE;
511
512 sta->flags &= ~WLAN_STA_WME;
513 if (params->station_flags & STATION_FLAG_WME)
514 sta->flags |= WLAN_STA_WME;
515 }
516
517 if (params->aid) {
518 sta->aid = params->aid;
519 if (sta->aid > IEEE80211_MAX_AID)
520 sta->aid = 0; /* XXX: should this be an error? */
521 }
522
523 if (params->listen_interval >= 0)
524 sta->listen_interval = params->listen_interval;
525
526 if (params->supported_rates) {
527 rates = 0;
528 mode = local->oper_hw_mode;
529 for (i = 0; i < params->supported_rates_len; i++) {
530 int rate = (params->supported_rates[i] & 0x7f) * 5;
531 for (j = 0; j < mode->num_rates; j++) {
532 if (mode->rates[j].rate == rate)
533 rates |= BIT(j);
534 }
535 }
536 sta->supp_rates = rates;
537 }
538}
539
540static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
541 u8 *mac, struct station_parameters *params)
542{
543 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
544 struct sta_info *sta;
545 struct ieee80211_sub_if_data *sdata;
546
547 /* Prevent a race with changing the rate control algorithm */
548 if (!netif_running(dev))
549 return -ENETDOWN;
550
551 /* XXX: get sta belonging to dev */
552 sta = sta_info_get(local, mac);
553 if (sta) {
554 sta_info_put(sta);
555 return -EEXIST;
556 }
557
558 if (params->vlan) {
559 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
560
561 if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN ||
562 sdata->vif.type != IEEE80211_IF_TYPE_AP)
563 return -EINVAL;
564 } else
565 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
566
567 sta = sta_info_add(local, dev, mac, GFP_KERNEL);
568 if (!sta)
569 return -ENOMEM;
570
571 sta->dev = sdata->dev;
572 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN ||
573 sdata->vif.type == IEEE80211_IF_TYPE_AP)
574 ieee80211_send_layer2_update(sta);
575
576 sta->flags = WLAN_STA_AUTH | WLAN_STA_ASSOC;
577
578 sta_apply_parameters(local, sta, params);
579
580 rate_control_rate_init(sta, local);
581
582 sta_info_put(sta);
583
584 return 0;
585}
586
587static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
588 u8 *mac)
589{
590 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
591 struct sta_info *sta;
592
593 if (mac) {
594 /* XXX: get sta belonging to dev */
595 sta = sta_info_get(local, mac);
596 if (!sta)
597 return -ENOENT;
598
599 sta_info_free(sta);
600 sta_info_put(sta);
601 } else
602 sta_info_flush(local, dev);
603
604 return 0;
605}
606
607static int ieee80211_change_station(struct wiphy *wiphy,
608 struct net_device *dev,
609 u8 *mac,
610 struct station_parameters *params)
611{
612 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
613 struct sta_info *sta;
614 struct ieee80211_sub_if_data *vlansdata;
615
616 /* XXX: get sta belonging to dev */
617 sta = sta_info_get(local, mac);
618 if (!sta)
619 return -ENOENT;
620
621 if (params->vlan && params->vlan != sta->dev) {
622 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
623
624 if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN ||
625 vlansdata->vif.type != IEEE80211_IF_TYPE_AP)
626 return -EINVAL;
627
628 sta->dev = params->vlan;
629 ieee80211_send_layer2_update(sta);
630 }
631
632 sta_apply_parameters(local, sta, params);
633
634 sta_info_put(sta);
635
636 return 0;
637}
638
102struct cfg80211_ops mac80211_config_ops = { 639struct cfg80211_ops mac80211_config_ops = {
103 .add_virtual_intf = ieee80211_add_iface, 640 .add_virtual_intf = ieee80211_add_iface,
104 .del_virtual_intf = ieee80211_del_iface, 641 .del_virtual_intf = ieee80211_del_iface,
105 .change_virtual_intf = ieee80211_change_iface, 642 .change_virtual_intf = ieee80211_change_iface,
643 .add_key = ieee80211_add_key,
644 .del_key = ieee80211_del_key,
645 .get_key = ieee80211_get_key,
646 .set_default_key = ieee80211_config_default_key,
647 .add_beacon = ieee80211_add_beacon,
648 .set_beacon = ieee80211_set_beacon,
649 .del_beacon = ieee80211_del_beacon,
650 .add_station = ieee80211_add_station,
651 .del_station = ieee80211_del_station,
652 .change_station = ieee80211_change_station,
653 .get_station = ieee80211_get_station,
106}; 654};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index f0e6ab7eb624..829872a3ae81 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -91,8 +91,7 @@ static const struct file_operations name##_ops = { \
91/* common attributes */ 91/* common attributes */
92IEEE80211_IF_FILE(channel_use, channel_use, DEC); 92IEEE80211_IF_FILE(channel_use, channel_use, DEC);
93IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); 93IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
94IEEE80211_IF_FILE(eapol, eapol, DEC); 94IEEE80211_IF_FILE(ieee802_1x_pac, ieee802_1x_pac, DEC);
95IEEE80211_IF_FILE(ieee8021_x, ieee802_1x, DEC);
96 95
97/* STA/IBSS attributes */ 96/* STA/IBSS attributes */
98IEEE80211_IF_FILE(state, u.sta.state, DEC); 97IEEE80211_IF_FILE(state, u.sta.state, DEC);
@@ -119,13 +118,12 @@ static ssize_t ieee80211_if_fmt_flags(
119 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", 118 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "",
120 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", 119 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "",
121 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", 120 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "",
122 sdata->flags & IEEE80211_SDATA_USE_PROTECTION ? "CTS prot\n" : ""); 121 sdata->bss_conf.use_cts_prot ? "CTS prot\n" : "");
123} 122}
124__IEEE80211_IF_FILE(flags); 123__IEEE80211_IF_FILE(flags);
125 124
126/* AP attributes */ 125/* AP attributes */
127IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); 126IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
128IEEE80211_IF_FILE(dtim_period, u.ap.dtim_period, DEC);
129IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); 127IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
130IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC); 128IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC);
131IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC); 129IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC);
@@ -139,26 +137,6 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast(
139} 137}
140__IEEE80211_IF_FILE(num_buffered_multicast); 138__IEEE80211_IF_FILE(num_buffered_multicast);
141 139
142static ssize_t ieee80211_if_fmt_beacon_head_len(
143 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
144{
145 if (sdata->u.ap.beacon_head)
146 return scnprintf(buf, buflen, "%d\n",
147 sdata->u.ap.beacon_head_len);
148 return scnprintf(buf, buflen, "\n");
149}
150__IEEE80211_IF_FILE(beacon_head_len);
151
152static ssize_t ieee80211_if_fmt_beacon_tail_len(
153 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
154{
155 if (sdata->u.ap.beacon_tail)
156 return scnprintf(buf, buflen, "%d\n",
157 sdata->u.ap.beacon_tail_len);
158 return scnprintf(buf, buflen, "\n");
159}
160__IEEE80211_IF_FILE(beacon_tail_len);
161
162/* WDS attributes */ 140/* WDS attributes */
163IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); 141IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
164 142
@@ -170,8 +148,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
170{ 148{
171 DEBUGFS_ADD(channel_use, sta); 149 DEBUGFS_ADD(channel_use, sta);
172 DEBUGFS_ADD(drop_unencrypted, sta); 150 DEBUGFS_ADD(drop_unencrypted, sta);
173 DEBUGFS_ADD(eapol, sta); 151 DEBUGFS_ADD(ieee802_1x_pac, sta);
174 DEBUGFS_ADD(ieee8021_x, sta);
175 DEBUGFS_ADD(state, sta); 152 DEBUGFS_ADD(state, sta);
176 DEBUGFS_ADD(bssid, sta); 153 DEBUGFS_ADD(bssid, sta);
177 DEBUGFS_ADD(prev_bssid, sta); 154 DEBUGFS_ADD(prev_bssid, sta);
@@ -192,25 +169,20 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
192{ 169{
193 DEBUGFS_ADD(channel_use, ap); 170 DEBUGFS_ADD(channel_use, ap);
194 DEBUGFS_ADD(drop_unencrypted, ap); 171 DEBUGFS_ADD(drop_unencrypted, ap);
195 DEBUGFS_ADD(eapol, ap); 172 DEBUGFS_ADD(ieee802_1x_pac, ap);
196 DEBUGFS_ADD(ieee8021_x, ap);
197 DEBUGFS_ADD(num_sta_ps, ap); 173 DEBUGFS_ADD(num_sta_ps, ap);
198 DEBUGFS_ADD(dtim_period, ap);
199 DEBUGFS_ADD(dtim_count, ap); 174 DEBUGFS_ADD(dtim_count, ap);
200 DEBUGFS_ADD(num_beacons, ap); 175 DEBUGFS_ADD(num_beacons, ap);
201 DEBUGFS_ADD(force_unicast_rateidx, ap); 176 DEBUGFS_ADD(force_unicast_rateidx, ap);
202 DEBUGFS_ADD(max_ratectrl_rateidx, ap); 177 DEBUGFS_ADD(max_ratectrl_rateidx, ap);
203 DEBUGFS_ADD(num_buffered_multicast, ap); 178 DEBUGFS_ADD(num_buffered_multicast, ap);
204 DEBUGFS_ADD(beacon_head_len, ap);
205 DEBUGFS_ADD(beacon_tail_len, ap);
206} 179}
207 180
208static void add_wds_files(struct ieee80211_sub_if_data *sdata) 181static void add_wds_files(struct ieee80211_sub_if_data *sdata)
209{ 182{
210 DEBUGFS_ADD(channel_use, wds); 183 DEBUGFS_ADD(channel_use, wds);
211 DEBUGFS_ADD(drop_unencrypted, wds); 184 DEBUGFS_ADD(drop_unencrypted, wds);
212 DEBUGFS_ADD(eapol, wds); 185 DEBUGFS_ADD(ieee802_1x_pac, wds);
213 DEBUGFS_ADD(ieee8021_x, wds);
214 DEBUGFS_ADD(peer, wds); 186 DEBUGFS_ADD(peer, wds);
215} 187}
216 188
@@ -218,8 +190,7 @@ static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
218{ 190{
219 DEBUGFS_ADD(channel_use, vlan); 191 DEBUGFS_ADD(channel_use, vlan);
220 DEBUGFS_ADD(drop_unencrypted, vlan); 192 DEBUGFS_ADD(drop_unencrypted, vlan);
221 DEBUGFS_ADD(eapol, vlan); 193 DEBUGFS_ADD(ieee802_1x_pac, vlan);
222 DEBUGFS_ADD(ieee8021_x, vlan);
223} 194}
224 195
225static void add_monitor_files(struct ieee80211_sub_if_data *sdata) 196static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -231,7 +202,7 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
231 if (!sdata->debugfsdir) 202 if (!sdata->debugfsdir)
232 return; 203 return;
233 204
234 switch (sdata->type) { 205 switch (sdata->vif.type) {
235 case IEEE80211_IF_TYPE_STA: 206 case IEEE80211_IF_TYPE_STA:
236 case IEEE80211_IF_TYPE_IBSS: 207 case IEEE80211_IF_TYPE_IBSS:
237 add_sta_files(sdata); 208 add_sta_files(sdata);
@@ -263,8 +234,7 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata)
263{ 234{
264 DEBUGFS_DEL(channel_use, sta); 235 DEBUGFS_DEL(channel_use, sta);
265 DEBUGFS_DEL(drop_unencrypted, sta); 236 DEBUGFS_DEL(drop_unencrypted, sta);
266 DEBUGFS_DEL(eapol, sta); 237 DEBUGFS_DEL(ieee802_1x_pac, sta);
267 DEBUGFS_DEL(ieee8021_x, sta);
268 DEBUGFS_DEL(state, sta); 238 DEBUGFS_DEL(state, sta);
269 DEBUGFS_DEL(bssid, sta); 239 DEBUGFS_DEL(bssid, sta);
270 DEBUGFS_DEL(prev_bssid, sta); 240 DEBUGFS_DEL(prev_bssid, sta);
@@ -285,25 +255,20 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata)
285{ 255{
286 DEBUGFS_DEL(channel_use, ap); 256 DEBUGFS_DEL(channel_use, ap);
287 DEBUGFS_DEL(drop_unencrypted, ap); 257 DEBUGFS_DEL(drop_unencrypted, ap);
288 DEBUGFS_DEL(eapol, ap); 258 DEBUGFS_DEL(ieee802_1x_pac, ap);
289 DEBUGFS_DEL(ieee8021_x, ap);
290 DEBUGFS_DEL(num_sta_ps, ap); 259 DEBUGFS_DEL(num_sta_ps, ap);
291 DEBUGFS_DEL(dtim_period, ap);
292 DEBUGFS_DEL(dtim_count, ap); 260 DEBUGFS_DEL(dtim_count, ap);
293 DEBUGFS_DEL(num_beacons, ap); 261 DEBUGFS_DEL(num_beacons, ap);
294 DEBUGFS_DEL(force_unicast_rateidx, ap); 262 DEBUGFS_DEL(force_unicast_rateidx, ap);
295 DEBUGFS_DEL(max_ratectrl_rateidx, ap); 263 DEBUGFS_DEL(max_ratectrl_rateidx, ap);
296 DEBUGFS_DEL(num_buffered_multicast, ap); 264 DEBUGFS_DEL(num_buffered_multicast, ap);
297 DEBUGFS_DEL(beacon_head_len, ap);
298 DEBUGFS_DEL(beacon_tail_len, ap);
299} 265}
300 266
301static void del_wds_files(struct ieee80211_sub_if_data *sdata) 267static void del_wds_files(struct ieee80211_sub_if_data *sdata)
302{ 268{
303 DEBUGFS_DEL(channel_use, wds); 269 DEBUGFS_DEL(channel_use, wds);
304 DEBUGFS_DEL(drop_unencrypted, wds); 270 DEBUGFS_DEL(drop_unencrypted, wds);
305 DEBUGFS_DEL(eapol, wds); 271 DEBUGFS_DEL(ieee802_1x_pac, wds);
306 DEBUGFS_DEL(ieee8021_x, wds);
307 DEBUGFS_DEL(peer, wds); 272 DEBUGFS_DEL(peer, wds);
308} 273}
309 274
@@ -311,8 +276,7 @@ static void del_vlan_files(struct ieee80211_sub_if_data *sdata)
311{ 276{
312 DEBUGFS_DEL(channel_use, vlan); 277 DEBUGFS_DEL(channel_use, vlan);
313 DEBUGFS_DEL(drop_unencrypted, vlan); 278 DEBUGFS_DEL(drop_unencrypted, vlan);
314 DEBUGFS_DEL(eapol, vlan); 279 DEBUGFS_DEL(ieee802_1x_pac, vlan);
315 DEBUGFS_DEL(ieee8021_x, vlan);
316} 280}
317 281
318static void del_monitor_files(struct ieee80211_sub_if_data *sdata) 282static void del_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -362,7 +326,7 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
362 326
363void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) 327void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
364{ 328{
365 del_files(sdata, sdata->type); 329 del_files(sdata, sdata->vif.type);
366 debugfs_remove(sdata->debugfsdir); 330 debugfs_remove(sdata->debugfsdir);
367 sdata->debugfsdir = NULL; 331 sdata->debugfsdir = NULL;
368} 332}
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index f484ca7ade9c..5dcc2d61551f 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -34,6 +34,8 @@
34#include "debugfs.h" 34#include "debugfs.h"
35#include "debugfs_netdev.h" 35#include "debugfs_netdev.h"
36 36
37#define SUPP_MCS_SET_LEN 16
38
37/* 39/*
38 * For seeing transmitted packets on monitor interfaces 40 * For seeing transmitted packets on monitor interfaces
39 * we have a radiotap header too. 41 * we have a radiotap header too.
@@ -175,21 +177,21 @@ static int ieee80211_open(struct net_device *dev)
175 /* 177 /*
176 * check whether it may have the same address 178 * check whether it may have the same address
177 */ 179 */
178 if (!identical_mac_addr_allowed(sdata->type, 180 if (!identical_mac_addr_allowed(sdata->vif.type,
179 nsdata->type)) 181 nsdata->vif.type))
180 return -ENOTUNIQ; 182 return -ENOTUNIQ;
181 183
182 /* 184 /*
183 * can only add VLANs to enabled APs 185 * can only add VLANs to enabled APs
184 */ 186 */
185 if (sdata->type == IEEE80211_IF_TYPE_VLAN && 187 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN &&
186 nsdata->type == IEEE80211_IF_TYPE_AP && 188 nsdata->vif.type == IEEE80211_IF_TYPE_AP &&
187 netif_running(nsdata->dev)) 189 netif_running(nsdata->dev))
188 sdata->u.vlan.ap = nsdata; 190 sdata->u.vlan.ap = nsdata;
189 } 191 }
190 } 192 }
191 193
192 switch (sdata->type) { 194 switch (sdata->vif.type) {
193 case IEEE80211_IF_TYPE_WDS: 195 case IEEE80211_IF_TYPE_WDS:
194 if (is_zero_ether_addr(sdata->u.wds.remote_addr)) 196 if (is_zero_ether_addr(sdata->u.wds.remote_addr))
195 return -ENOLINK; 197 return -ENOLINK;
@@ -216,9 +218,11 @@ static int ieee80211_open(struct net_device *dev)
216 res = local->ops->start(local_to_hw(local)); 218 res = local->ops->start(local_to_hw(local));
217 if (res) 219 if (res)
218 return res; 220 return res;
221 ieee80211_hw_config(local);
222 ieee80211_led_radio(local, local->hw.conf.radio_enabled);
219 } 223 }
220 224
221 switch (sdata->type) { 225 switch (sdata->vif.type) {
222 case IEEE80211_IF_TYPE_VLAN: 226 case IEEE80211_IF_TYPE_VLAN:
223 list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans); 227 list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans);
224 /* no need to tell driver */ 228 /* no need to tell driver */
@@ -232,7 +236,6 @@ static int ieee80211_open(struct net_device *dev)
232 netif_tx_unlock_bh(local->mdev); 236 netif_tx_unlock_bh(local->mdev);
233 237
234 local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; 238 local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP;
235 ieee80211_hw_config(local);
236 } 239 }
237 break; 240 break;
238 case IEEE80211_IF_TYPE_STA: 241 case IEEE80211_IF_TYPE_STA:
@@ -240,8 +243,8 @@ static int ieee80211_open(struct net_device *dev)
240 sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; 243 sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
241 /* fall through */ 244 /* fall through */
242 default: 245 default:
243 conf.if_id = dev->ifindex; 246 conf.vif = &sdata->vif;
244 conf.type = sdata->type; 247 conf.type = sdata->vif.type;
245 conf.mac_addr = dev->dev_addr; 248 conf.mac_addr = dev->dev_addr;
246 res = local->ops->add_interface(local_to_hw(local), &conf); 249 res = local->ops->add_interface(local_to_hw(local), &conf);
247 if (res && !local->open_count && local->ops->stop) 250 if (res && !local->open_count && local->ops->stop)
@@ -253,7 +256,7 @@ static int ieee80211_open(struct net_device *dev)
253 ieee80211_reset_erp_info(dev); 256 ieee80211_reset_erp_info(dev);
254 ieee80211_enable_keys(sdata); 257 ieee80211_enable_keys(sdata);
255 258
256 if (sdata->type == IEEE80211_IF_TYPE_STA && 259 if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
257 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) 260 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
258 netif_carrier_off(dev); 261 netif_carrier_off(dev);
259 else 262 else
@@ -267,6 +270,17 @@ static int ieee80211_open(struct net_device *dev)
267 tasklet_enable(&local->tasklet); 270 tasklet_enable(&local->tasklet);
268 } 271 }
269 272
273 /*
274 * set_multicast_list will be invoked by the networking core
275 * which will check whether any increments here were done in
276 * error and sync them down to the hardware as filter flags.
277 */
278 if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
279 atomic_inc(&local->iff_allmultis);
280
281 if (sdata->flags & IEEE80211_SDATA_PROMISC)
282 atomic_inc(&local->iff_promiscs);
283
270 local->open_count++; 284 local->open_count++;
271 285
272 netif_start_queue(dev); 286 netif_start_queue(dev);
@@ -279,17 +293,47 @@ static int ieee80211_stop(struct net_device *dev)
279 struct ieee80211_sub_if_data *sdata; 293 struct ieee80211_sub_if_data *sdata;
280 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 294 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
281 struct ieee80211_if_init_conf conf; 295 struct ieee80211_if_init_conf conf;
296 struct sta_info *sta;
297 int i;
282 298
283 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 299 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
284 300
301 list_for_each_entry(sta, &local->sta_list, list) {
302 if (sta->dev == dev)
303 for (i = 0; i < STA_TID_NUM; i++)
304 ieee80211_sta_stop_rx_ba_session(sta->dev,
305 sta->addr, i,
306 WLAN_BACK_RECIPIENT,
307 WLAN_REASON_QSTA_LEAVE_QBSS);
308 }
309
285 netif_stop_queue(dev); 310 netif_stop_queue(dev);
286 311
312 /*
313 * Don't count this interface for promisc/allmulti while it
314 * is down. dev_mc_unsync() will invoke set_multicast_list
315 * on the master interface which will sync these down to the
316 * hardware as filter flags.
317 */
318 if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
319 atomic_dec(&local->iff_allmultis);
320
321 if (sdata->flags & IEEE80211_SDATA_PROMISC)
322 atomic_dec(&local->iff_promiscs);
323
287 dev_mc_unsync(local->mdev, dev); 324 dev_mc_unsync(local->mdev, dev);
288 325
289 /* down all dependent devices, that is VLANs */ 326 /* APs need special treatment */
290 if (sdata->type == IEEE80211_IF_TYPE_AP) { 327 if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
291 struct ieee80211_sub_if_data *vlan, *tmp; 328 struct ieee80211_sub_if_data *vlan, *tmp;
329 struct beacon_data *old_beacon = sdata->u.ap.beacon;
330
331 /* remove beacon */
332 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
333 synchronize_rcu();
334 kfree(old_beacon);
292 335
336 /* down all dependent devices, that is VLANs */
293 list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, 337 list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans,
294 u.vlan.list) 338 u.vlan.list)
295 dev_close(vlan->dev); 339 dev_close(vlan->dev);
@@ -298,7 +342,7 @@ static int ieee80211_stop(struct net_device *dev)
298 342
299 local->open_count--; 343 local->open_count--;
300 344
301 switch (sdata->type) { 345 switch (sdata->vif.type) {
302 case IEEE80211_IF_TYPE_VLAN: 346 case IEEE80211_IF_TYPE_VLAN:
303 list_del(&sdata->u.vlan.list); 347 list_del(&sdata->u.vlan.list);
304 sdata->u.vlan.ap = NULL; 348 sdata->u.vlan.ap = NULL;
@@ -311,8 +355,7 @@ static int ieee80211_stop(struct net_device *dev)
311 ieee80211_configure_filter(local); 355 ieee80211_configure_filter(local);
312 netif_tx_unlock_bh(local->mdev); 356 netif_tx_unlock_bh(local->mdev);
313 357
314 local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; 358 local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP;
315 ieee80211_hw_config(local);
316 } 359 }
317 break; 360 break;
318 case IEEE80211_IF_TYPE_STA: 361 case IEEE80211_IF_TYPE_STA:
@@ -328,16 +371,24 @@ static int ieee80211_stop(struct net_device *dev)
328 synchronize_rcu(); 371 synchronize_rcu();
329 skb_queue_purge(&sdata->u.sta.skb_queue); 372 skb_queue_purge(&sdata->u.sta.skb_queue);
330 373
331 if (!local->ops->hw_scan && 374 if (local->scan_dev == sdata->dev) {
332 local->scan_dev == sdata->dev) { 375 if (!local->ops->hw_scan) {
333 local->sta_scanning = 0; 376 local->sta_sw_scanning = 0;
334 cancel_delayed_work(&local->scan_work); 377 cancel_delayed_work(&local->scan_work);
378 } else
379 local->sta_hw_scanning = 0;
335 } 380 }
381
336 flush_workqueue(local->hw.workqueue); 382 flush_workqueue(local->hw.workqueue);
383
384 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
385 kfree(sdata->u.sta.extra_ie);
386 sdata->u.sta.extra_ie = NULL;
387 sdata->u.sta.extra_ie_len = 0;
337 /* fall through */ 388 /* fall through */
338 default: 389 default:
339 conf.if_id = dev->ifindex; 390 conf.vif = &sdata->vif;
340 conf.type = sdata->type; 391 conf.type = sdata->vif.type;
341 conf.mac_addr = dev->dev_addr; 392 conf.mac_addr = dev->dev_addr;
342 /* disable all keys for as long as this netdev is down */ 393 /* disable all keys for as long as this netdev is down */
343 ieee80211_disable_keys(sdata); 394 ieee80211_disable_keys(sdata);
@@ -351,6 +402,8 @@ static int ieee80211_stop(struct net_device *dev)
351 if (local->ops->stop) 402 if (local->ops->stop)
352 local->ops->stop(local_to_hw(local)); 403 local->ops->stop(local_to_hw(local));
353 404
405 ieee80211_led_radio(local, 0);
406
354 tasklet_disable(&local->tx_pending_tasklet); 407 tasklet_disable(&local->tx_pending_tasklet);
355 tasklet_disable(&local->tasklet); 408 tasklet_disable(&local->tasklet);
356 } 409 }
@@ -366,8 +419,8 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
366 419
367 allmulti = !!(dev->flags & IFF_ALLMULTI); 420 allmulti = !!(dev->flags & IFF_ALLMULTI);
368 promisc = !!(dev->flags & IFF_PROMISC); 421 promisc = !!(dev->flags & IFF_PROMISC);
369 sdata_allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; 422 sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI);
370 sdata_promisc = sdata->flags & IEEE80211_SDATA_PROMISC; 423 sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC);
371 424
372 if (allmulti != sdata_allmulti) { 425 if (allmulti != sdata_allmulti) {
373 if (dev->flags & IFF_ALLMULTI) 426 if (dev->flags & IFF_ALLMULTI)
@@ -400,7 +453,6 @@ static const struct header_ops ieee80211_header_ops = {
400void ieee80211_if_setup(struct net_device *dev) 453void ieee80211_if_setup(struct net_device *dev)
401{ 454{
402 ether_setup(dev); 455 ether_setup(dev);
403 dev->header_ops = &ieee80211_header_ops;
404 dev->hard_start_xmit = ieee80211_subif_start_xmit; 456 dev->hard_start_xmit = ieee80211_subif_start_xmit;
405 dev->wireless_handlers = &ieee80211_iw_handler_def; 457 dev->wireless_handlers = &ieee80211_iw_handler_def;
406 dev->set_multicast_list = ieee80211_set_multicast_list; 458 dev->set_multicast_list = ieee80211_set_multicast_list;
@@ -459,20 +511,20 @@ static int __ieee80211_if_config(struct net_device *dev,
459 return 0; 511 return 0;
460 512
461 memset(&conf, 0, sizeof(conf)); 513 memset(&conf, 0, sizeof(conf));
462 conf.type = sdata->type; 514 conf.type = sdata->vif.type;
463 if (sdata->type == IEEE80211_IF_TYPE_STA || 515 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
464 sdata->type == IEEE80211_IF_TYPE_IBSS) { 516 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
465 conf.bssid = sdata->u.sta.bssid; 517 conf.bssid = sdata->u.sta.bssid;
466 conf.ssid = sdata->u.sta.ssid; 518 conf.ssid = sdata->u.sta.ssid;
467 conf.ssid_len = sdata->u.sta.ssid_len; 519 conf.ssid_len = sdata->u.sta.ssid_len;
468 } else if (sdata->type == IEEE80211_IF_TYPE_AP) { 520 } else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
469 conf.ssid = sdata->u.ap.ssid; 521 conf.ssid = sdata->u.ap.ssid;
470 conf.ssid_len = sdata->u.ap.ssid_len; 522 conf.ssid_len = sdata->u.ap.ssid_len;
471 conf.beacon = beacon; 523 conf.beacon = beacon;
472 conf.beacon_control = control; 524 conf.beacon_control = control;
473 } 525 }
474 return local->ops->config_interface(local_to_hw(local), 526 return local->ops->config_interface(local_to_hw(local),
475 dev->ifindex, &conf); 527 &sdata->vif, &conf);
476} 528}
477 529
478int ieee80211_if_config(struct net_device *dev) 530int ieee80211_if_config(struct net_device *dev)
@@ -484,11 +536,13 @@ int ieee80211_if_config_beacon(struct net_device *dev)
484{ 536{
485 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 537 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
486 struct ieee80211_tx_control control; 538 struct ieee80211_tx_control control;
539 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
487 struct sk_buff *skb; 540 struct sk_buff *skb;
488 541
489 if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) 542 if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
490 return 0; 543 return 0;
491 skb = ieee80211_beacon_get(local_to_hw(local), dev->ifindex, &control); 544 skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif,
545 &control);
492 if (!skb) 546 if (!skb)
493 return -ENOMEM; 547 return -ENOMEM;
494 return __ieee80211_if_config(dev, skb, &control); 548 return __ieee80211_if_config(dev, skb, &control);
@@ -500,7 +554,7 @@ int ieee80211_hw_config(struct ieee80211_local *local)
500 struct ieee80211_channel *chan; 554 struct ieee80211_channel *chan;
501 int ret = 0; 555 int ret = 0;
502 556
503 if (local->sta_scanning) { 557 if (local->sta_sw_scanning) {
504 chan = local->scan_channel; 558 chan = local->scan_channel;
505 mode = local->scan_hw_mode; 559 mode = local->scan_hw_mode;
506 } else { 560 } else {
@@ -534,25 +588,79 @@ int ieee80211_hw_config(struct ieee80211_local *local)
534 return ret; 588 return ret;
535} 589}
536 590
537void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes) 591/**
592 * ieee80211_hw_config_ht should be used only after legacy configuration
593 * has been determined, as ht configuration depends upon the hardware's
594 * HT abilities for a _specific_ band.
595 */
596int ieee80211_hw_config_ht(struct ieee80211_local *local, int enable_ht,
597 struct ieee80211_ht_info *req_ht_cap,
598 struct ieee80211_ht_bss_info *req_bss_cap)
538{ 599{
539 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 600 struct ieee80211_conf *conf = &local->hw.conf;
540 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 601 struct ieee80211_hw_mode *mode = conf->mode;
541 if (local->ops->erp_ie_changed) 602 int i;
542 local->ops->erp_ie_changed(local_to_hw(local), changes, 603
543 !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION), 604 /* HT is not supported */
544 !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)); 605 if (!mode->ht_info.ht_supported) {
606 conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
607 return -EOPNOTSUPP;
608 }
609
610 /* disable HT */
611 if (!enable_ht) {
612 conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
613 } else {
614 conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
615 conf->ht_conf.cap = req_ht_cap->cap & mode->ht_info.cap;
616 conf->ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS);
617 conf->ht_conf.cap |=
618 mode->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS;
619 conf->ht_bss_conf.primary_channel =
620 req_bss_cap->primary_channel;
621 conf->ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
622 conf->ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
623 for (i = 0; i < SUPP_MCS_SET_LEN; i++)
624 conf->ht_conf.supp_mcs_set[i] =
625 mode->ht_info.supp_mcs_set[i] &
626 req_ht_cap->supp_mcs_set[i];
627
628 /* In STA mode, this gives us indication
629 * to the AP's mode of operation */
630 conf->ht_conf.ht_supported = 1;
631 conf->ht_conf.ampdu_factor = req_ht_cap->ampdu_factor;
632 conf->ht_conf.ampdu_density = req_ht_cap->ampdu_density;
633 }
634
635 local->ops->conf_ht(local_to_hw(local), &local->hw.conf);
636
637 return 0;
638}
639
640void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
641 u32 changed)
642{
643 struct ieee80211_local *local = sdata->local;
644
645 if (!changed)
646 return;
647
648 if (local->ops->bss_info_changed)
649 local->ops->bss_info_changed(local_to_hw(local),
650 &sdata->vif,
651 &sdata->bss_conf,
652 changed);
545} 653}
546 654
547void ieee80211_reset_erp_info(struct net_device *dev) 655void ieee80211_reset_erp_info(struct net_device *dev)
548{ 656{
549 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 657 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
550 658
551 sdata->flags &= ~(IEEE80211_SDATA_USE_PROTECTION | 659 sdata->bss_conf.use_cts_prot = 0;
552 IEEE80211_SDATA_SHORT_PREAMBLE); 660 sdata->bss_conf.use_short_preamble = 0;
553 ieee80211_erp_info_change_notify(dev, 661 ieee80211_bss_info_change_notify(sdata,
554 IEEE80211_ERP_CHANGE_PROTECTION | 662 BSS_CHANGED_ERP_CTS_PROT |
555 IEEE80211_ERP_CHANGE_PREAMBLE); 663 BSS_CHANGED_ERP_PREAMBLE);
556} 664}
557 665
558void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, 666void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
@@ -609,7 +717,7 @@ static void ieee80211_tasklet_handler(unsigned long data)
609 case IEEE80211_RX_MSG: 717 case IEEE80211_RX_MSG:
610 /* status is in skb->cb */ 718 /* status is in skb->cb */
611 memcpy(&rx_status, skb->cb, sizeof(rx_status)); 719 memcpy(&rx_status, skb->cb, sizeof(rx_status));
612 /* Clear skb->type in order to not confuse kernel 720 /* Clear skb->pkt_type in order to not confuse kernel
613 * netstack. */ 721 * netstack. */
614 skb->pkt_type = 0; 722 skb->pkt_type = 0;
615 __ieee80211_rx(local_to_hw(local), skb, &rx_status); 723 __ieee80211_rx(local_to_hw(local), skb, &rx_status);
@@ -644,7 +752,7 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
644 struct ieee80211_tx_packet_data *pkt_data; 752 struct ieee80211_tx_packet_data *pkt_data;
645 753
646 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; 754 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
647 pkt_data->ifindex = control->ifindex; 755 pkt_data->ifindex = vif_to_sdata(control->vif)->dev->ifindex;
648 pkt_data->flags = 0; 756 pkt_data->flags = 0;
649 if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS) 757 if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS)
650 pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; 758 pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
@@ -652,6 +760,8 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
652 pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; 760 pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
653 if (control->flags & IEEE80211_TXCTL_REQUEUE) 761 if (control->flags & IEEE80211_TXCTL_REQUEUE)
654 pkt_data->flags |= IEEE80211_TXPD_REQUEUE; 762 pkt_data->flags |= IEEE80211_TXPD_REQUEUE;
763 if (control->flags & IEEE80211_TXCTL_EAPOL_FRAME)
764 pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
655 pkt_data->queue = control->queue; 765 pkt_data->queue = control->queue;
656 766
657 hdrlen = ieee80211_get_hdrlen_from_skb(skb); 767 hdrlen = ieee80211_get_hdrlen_from_skb(skb);
@@ -779,10 +889,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
779 sta_info_put(sta); 889 sta_info_put(sta);
780 return; 890 return;
781 } 891 }
782 } else { 892 } else
783 /* FIXME: STUPID to call this with both local and local->mdev */ 893 rate_control_tx_status(local->mdev, skb, status);
784 rate_control_tx_status(local, local->mdev, skb, status);
785 }
786 894
787 ieee80211_led_tx(local, 0); 895 ieee80211_led_tx(local, 0);
788 896
@@ -868,7 +976,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
868 if (!monitors || !skb) 976 if (!monitors || !skb)
869 goto out; 977 goto out;
870 978
871 if (sdata->type == IEEE80211_IF_TYPE_MNTR) { 979 if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) {
872 if (!netif_running(sdata->dev)) 980 if (!netif_running(sdata->dev))
873 continue; 981 continue;
874 monitors--; 982 monitors--;
@@ -990,7 +1098,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
990 mdev->header_ops = &ieee80211_header_ops; 1098 mdev->header_ops = &ieee80211_header_ops;
991 mdev->set_multicast_list = ieee80211_master_set_multicast_list; 1099 mdev->set_multicast_list = ieee80211_master_set_multicast_list;
992 1100
993 sdata->type = IEEE80211_IF_TYPE_AP; 1101 sdata->vif.type = IEEE80211_IF_TYPE_AP;
994 sdata->dev = mdev; 1102 sdata->dev = mdev;
995 sdata->local = local; 1103 sdata->local = local;
996 sdata->u.ap.force_unicast_rateidx = -1; 1104 sdata->u.ap.force_unicast_rateidx = -1;
@@ -1072,7 +1180,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1072 ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); 1180 ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev));
1073 ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP); 1181 ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP);
1074 1182
1075 result = ieee80211_init_rate_ctrl_alg(local, NULL); 1183 result = ieee80211_init_rate_ctrl_alg(local,
1184 hw->rate_control_algorithm);
1076 if (result < 0) { 1185 if (result < 0) {
1077 printk(KERN_DEBUG "%s: Failed to initialize rate control " 1186 printk(KERN_DEBUG "%s: Failed to initialize rate control "
1078 "algorithm\n", wiphy_name(local->hw.wiphy)); 1187 "algorithm\n", wiphy_name(local->hw.wiphy));
@@ -1233,21 +1342,39 @@ static int __init ieee80211_init(void)
1233 1342
1234 BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb)); 1343 BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb));
1235 1344
1345 ret = rc80211_simple_init();
1346 if (ret)
1347 goto fail;
1348
1349 ret = rc80211_pid_init();
1350 if (ret)
1351 goto fail_simple;
1352
1236 ret = ieee80211_wme_register(); 1353 ret = ieee80211_wme_register();
1237 if (ret) { 1354 if (ret) {
1238 printk(KERN_DEBUG "ieee80211_init: failed to " 1355 printk(KERN_DEBUG "ieee80211_init: failed to "
1239 "initialize WME (err=%d)\n", ret); 1356 "initialize WME (err=%d)\n", ret);
1240 return ret; 1357 goto fail_pid;
1241 } 1358 }
1242 1359
1243 ieee80211_debugfs_netdev_init(); 1360 ieee80211_debugfs_netdev_init();
1244 ieee80211_regdomain_init(); 1361 ieee80211_regdomain_init();
1245 1362
1246 return 0; 1363 return 0;
1364
1365 fail_pid:
1366 rc80211_simple_exit();
1367 fail_simple:
1368 rc80211_pid_exit();
1369 fail:
1370 return ret;
1247} 1371}
1248 1372
1249static void __exit ieee80211_exit(void) 1373static void __exit ieee80211_exit(void)
1250{ 1374{
1375 rc80211_simple_exit();
1376 rc80211_pid_exit();
1377
1251 ieee80211_wme_unregister(); 1378 ieee80211_wme_unregister();
1252 ieee80211_debugfs_netdev_exit(); 1379 ieee80211_debugfs_netdev_exit();
1253} 1380}
diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h
deleted file mode 100644
index c15295d43d87..000000000000
--- a/net/mac80211/ieee80211_common.h
+++ /dev/null
@@ -1,91 +0,0 @@
1/*
2 * IEEE 802.11 driver (80211.o) -- hostapd interface
3 * Copyright 2002-2004, Instant802 Networks, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef IEEE80211_COMMON_H
11#define IEEE80211_COMMON_H
12
13#include <linux/types.h>
14
15/*
16 * This is common header information with user space. It is used on all
17 * frames sent to wlan#ap interface.
18 */
19
20#define IEEE80211_FI_VERSION 0x80211001
21
22struct ieee80211_frame_info {
23 __be32 version;
24 __be32 length;
25 __be64 mactime;
26 __be64 hosttime;
27 __be32 phytype;
28 __be32 channel;
29 __be32 datarate;
30 __be32 antenna;
31 __be32 priority;
32 __be32 ssi_type;
33 __be32 ssi_signal;
34 __be32 ssi_noise;
35 __be32 preamble;
36 __be32 encoding;
37
38 /* Note: this structure is otherwise identical to capture format used
39 * in linux-wlan-ng, but this additional field is used to provide meta
40 * data about the frame to hostapd. This was the easiest method for
41 * providing this information, but this might change in the future. */
42 __be32 msg_type;
43} __attribute__ ((packed));
44
45
46enum ieee80211_msg_type {
47 ieee80211_msg_normal = 0,
48 ieee80211_msg_tx_callback_ack = 1,
49 ieee80211_msg_tx_callback_fail = 2,
50 /* hole at 3, was ieee80211_msg_passive_scan but unused */
51 /* hole at 4, was ieee80211_msg_wep_frame_unknown_key but now unused */
52 ieee80211_msg_michael_mic_failure = 5,
53 /* hole at 6, was monitor but never sent to userspace */
54 ieee80211_msg_sta_not_assoc = 7,
55 /* 8 was ieee80211_msg_set_aid_for_sta */
56 /* 9 was ieee80211_msg_key_threshold_notification */
57 /* 11 was ieee80211_msg_radar */
58};
59
60struct ieee80211_msg_key_notification {
61 int tx_rx_count;
62 char ifname[IFNAMSIZ];
63 u8 addr[ETH_ALEN]; /* ff:ff:ff:ff:ff:ff for broadcast keys */
64};
65
66
67enum ieee80211_phytype {
68 ieee80211_phytype_fhss_dot11_97 = 1,
69 ieee80211_phytype_dsss_dot11_97 = 2,
70 ieee80211_phytype_irbaseband = 3,
71 ieee80211_phytype_dsss_dot11_b = 4,
72 ieee80211_phytype_pbcc_dot11_b = 5,
73 ieee80211_phytype_ofdm_dot11_g = 6,
74 ieee80211_phytype_pbcc_dot11_g = 7,
75 ieee80211_phytype_ofdm_dot11_a = 8,
76};
77
78enum ieee80211_ssi_type {
79 ieee80211_ssi_none = 0,
80 ieee80211_ssi_norm = 1, /* normalized, 0-1000 */
81 ieee80211_ssi_dbm = 2,
82 ieee80211_ssi_raw = 3, /* raw SSI */
83};
84
85struct ieee80211_radar_info {
86 int channel;
87 int radar;
88 int radar_type;
89};
90
91#endif /* IEEE80211_COMMON_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4b4ed2a5803c..72ecbf7bf962 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -89,6 +89,8 @@ struct ieee80211_sta_bss {
89 size_t rsn_ie_len; 89 size_t rsn_ie_len;
90 u8 *wmm_ie; 90 u8 *wmm_ie;
91 size_t wmm_ie_len; 91 size_t wmm_ie_len;
92 u8 *ht_ie;
93 size_t ht_ie_len;
92#define IEEE80211_MAX_SUPP_RATES 32 94#define IEEE80211_MAX_SUPP_RATES 32
93 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; 95 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
94 size_t supp_rates_len; 96 size_t supp_rates_len;
@@ -121,6 +123,7 @@ typedef enum {
121/* frame is destined to interface currently processed (incl. multicast frames) */ 123/* frame is destined to interface currently processed (incl. multicast frames) */
122#define IEEE80211_TXRXD_RXRA_MATCH BIT(5) 124#define IEEE80211_TXRXD_RXRA_MATCH BIT(5)
123#define IEEE80211_TXRXD_TX_INJECTED BIT(6) 125#define IEEE80211_TXRXD_TX_INJECTED BIT(6)
126#define IEEE80211_TXRXD_RX_AMSDU BIT(7)
124struct ieee80211_txrx_data { 127struct ieee80211_txrx_data {
125 struct sk_buff *skb; 128 struct sk_buff *skb;
126 struct net_device *dev; 129 struct net_device *dev;
@@ -161,6 +164,7 @@ struct ieee80211_txrx_data {
161#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0) 164#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0)
162#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1) 165#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1)
163#define IEEE80211_TXPD_REQUEUE BIT(2) 166#define IEEE80211_TXPD_REQUEUE BIT(2)
167#define IEEE80211_TXPD_EAPOL_FRAME BIT(3)
164/* Stored in sk_buff->cb */ 168/* Stored in sk_buff->cb */
165struct ieee80211_tx_packet_data { 169struct ieee80211_tx_packet_data {
166 int ifindex; 170 int ifindex;
@@ -186,9 +190,14 @@ typedef ieee80211_txrx_result (*ieee80211_tx_handler)
186typedef ieee80211_txrx_result (*ieee80211_rx_handler) 190typedef ieee80211_txrx_result (*ieee80211_rx_handler)
187(struct ieee80211_txrx_data *rx); 191(struct ieee80211_txrx_data *rx);
188 192
193struct beacon_data {
194 u8 *head, *tail;
195 int head_len, tail_len;
196 int dtim_period;
197};
198
189struct ieee80211_if_ap { 199struct ieee80211_if_ap {
190 u8 *beacon_head, *beacon_tail; 200 struct beacon_data *beacon;
191 int beacon_head_len, beacon_tail_len;
192 201
193 struct list_head vlans; 202 struct list_head vlans;
194 203
@@ -201,7 +210,7 @@ struct ieee80211_if_ap {
201 u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)]; 210 u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)];
202 atomic_t num_sta_ps; /* number of stations in PS mode */ 211 atomic_t num_sta_ps; /* number of stations in PS mode */
203 struct sk_buff_head ps_bc_buf; 212 struct sk_buff_head ps_bc_buf;
204 int dtim_period, dtim_count; 213 int dtim_count;
205 int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ 214 int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
206 int max_ratectrl_rateidx; /* max TX rateidx for rate control */ 215 int max_ratectrl_rateidx; /* max TX rateidx for rate control */
207 int num_beacons; /* number of TXed beacon frames for this BSS */ 216 int num_beacons; /* number of TXed beacon frames for this BSS */
@@ -230,6 +239,7 @@ struct ieee80211_if_vlan {
230#define IEEE80211_STA_AUTO_SSID_SEL BIT(10) 239#define IEEE80211_STA_AUTO_SSID_SEL BIT(10)
231#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) 240#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11)
232#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) 241#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12)
242#define IEEE80211_STA_PRIVACY_INVOKED BIT(13)
233struct ieee80211_if_sta { 243struct ieee80211_if_sta {
234 enum { 244 enum {
235 IEEE80211_DISABLED, IEEE80211_AUTHENTICATE, 245 IEEE80211_DISABLED, IEEE80211_AUTHENTICATE,
@@ -241,6 +251,8 @@ struct ieee80211_if_sta {
241 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; 251 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
242 u8 ssid[IEEE80211_MAX_SSID_LEN]; 252 u8 ssid[IEEE80211_MAX_SSID_LEN];
243 size_t ssid_len; 253 size_t ssid_len;
254 u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
255 size_t scan_ssid_len;
244 u16 aid; 256 u16 aid;
245 u16 ap_capab, capab; 257 u16 ap_capab, capab;
246 u8 *extra_ie; /* to be added to the end of AssocReq */ 258 u8 *extra_ie; /* to be added to the end of AssocReq */
@@ -259,7 +271,6 @@ struct ieee80211_if_sta {
259 unsigned long request; 271 unsigned long request;
260 struct sk_buff_head skb_queue; 272 struct sk_buff_head skb_queue;
261 273
262 int key_management_enabled;
263 unsigned long last_probe; 274 unsigned long last_probe;
264 275
265#define IEEE80211_AUTH_ALG_OPEN BIT(0) 276#define IEEE80211_AUTH_ALG_OPEN BIT(0)
@@ -280,15 +291,9 @@ struct ieee80211_if_sta {
280/* flags used in struct ieee80211_sub_if_data.flags */ 291/* flags used in struct ieee80211_sub_if_data.flags */
281#define IEEE80211_SDATA_ALLMULTI BIT(0) 292#define IEEE80211_SDATA_ALLMULTI BIT(0)
282#define IEEE80211_SDATA_PROMISC BIT(1) 293#define IEEE80211_SDATA_PROMISC BIT(1)
283#define IEEE80211_SDATA_USE_PROTECTION BIT(2) /* CTS protect ERP frames */ 294#define IEEE80211_SDATA_USERSPACE_MLME BIT(2)
284/* use short preamble with IEEE 802.11b: this flag is set when the AP or beacon
285 * generator reports that there are no present stations that cannot support short
286 * preambles */
287#define IEEE80211_SDATA_SHORT_PREAMBLE BIT(3)
288#define IEEE80211_SDATA_USERSPACE_MLME BIT(4)
289struct ieee80211_sub_if_data { 295struct ieee80211_sub_if_data {
290 struct list_head list; 296 struct list_head list;
291 enum ieee80211_if_types type;
292 297
293 struct wireless_dev wdev; 298 struct wireless_dev wdev;
294 299
@@ -301,11 +306,11 @@ struct ieee80211_sub_if_data {
301 unsigned int flags; 306 unsigned int flags;
302 307
303 int drop_unencrypted; 308 int drop_unencrypted;
304 int eapol; /* 0 = process EAPOL frames as normal data frames, 309 /*
305 * 1 = send EAPOL frames through wlan#ap to hostapd 310 * IEEE 802.1X Port access control in effect,
306 * (default) */ 311 * drop packets to/from unauthorized port
307 int ieee802_1x; /* IEEE 802.1X PAE - drop packet to/from unauthorized 312 */
308 * port */ 313 int ieee802_1x_pac;
309 314
310 u16 sequence; 315 u16 sequence;
311 316
@@ -317,6 +322,15 @@ struct ieee80211_sub_if_data {
317 struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; 322 struct ieee80211_key *keys[NUM_DEFAULT_KEYS];
318 struct ieee80211_key *default_key; 323 struct ieee80211_key *default_key;
319 324
325 /*
326 * BSS configuration for this interface.
327 *
328 * FIXME: I feel bad putting this here when we already have a
329 * bss pointer, but the bss pointer is just wrong when
330 * you have multiple virtual STA mode interfaces...
331 * This needs to be fixed.
332 */
333 struct ieee80211_bss_conf bss_conf;
320 struct ieee80211_if_ap *bss; /* BSS that this device belongs to */ 334 struct ieee80211_if_ap *bss; /* BSS that this device belongs to */
321 335
322 union { 336 union {
@@ -334,8 +348,7 @@ struct ieee80211_sub_if_data {
334 struct { 348 struct {
335 struct dentry *channel_use; 349 struct dentry *channel_use;
336 struct dentry *drop_unencrypted; 350 struct dentry *drop_unencrypted;
337 struct dentry *eapol; 351 struct dentry *ieee802_1x_pac;
338 struct dentry *ieee8021_x;
339 struct dentry *state; 352 struct dentry *state;
340 struct dentry *bssid; 353 struct dentry *bssid;
341 struct dentry *prev_bssid; 354 struct dentry *prev_bssid;
@@ -354,30 +367,24 @@ struct ieee80211_sub_if_data {
354 struct { 367 struct {
355 struct dentry *channel_use; 368 struct dentry *channel_use;
356 struct dentry *drop_unencrypted; 369 struct dentry *drop_unencrypted;
357 struct dentry *eapol; 370 struct dentry *ieee802_1x_pac;
358 struct dentry *ieee8021_x;
359 struct dentry *num_sta_ps; 371 struct dentry *num_sta_ps;
360 struct dentry *dtim_period;
361 struct dentry *dtim_count; 372 struct dentry *dtim_count;
362 struct dentry *num_beacons; 373 struct dentry *num_beacons;
363 struct dentry *force_unicast_rateidx; 374 struct dentry *force_unicast_rateidx;
364 struct dentry *max_ratectrl_rateidx; 375 struct dentry *max_ratectrl_rateidx;
365 struct dentry *num_buffered_multicast; 376 struct dentry *num_buffered_multicast;
366 struct dentry *beacon_head_len;
367 struct dentry *beacon_tail_len;
368 } ap; 377 } ap;
369 struct { 378 struct {
370 struct dentry *channel_use; 379 struct dentry *channel_use;
371 struct dentry *drop_unencrypted; 380 struct dentry *drop_unencrypted;
372 struct dentry *eapol; 381 struct dentry *ieee802_1x_pac;
373 struct dentry *ieee8021_x;
374 struct dentry *peer; 382 struct dentry *peer;
375 } wds; 383 } wds;
376 struct { 384 struct {
377 struct dentry *channel_use; 385 struct dentry *channel_use;
378 struct dentry *drop_unencrypted; 386 struct dentry *drop_unencrypted;
379 struct dentry *eapol; 387 struct dentry *ieee802_1x_pac;
380 struct dentry *ieee8021_x;
381 } vlan; 388 } vlan;
382 struct { 389 struct {
383 struct dentry *mode; 390 struct dentry *mode;
@@ -385,8 +392,16 @@ struct ieee80211_sub_if_data {
385 struct dentry *default_key; 392 struct dentry *default_key;
386 } debugfs; 393 } debugfs;
387#endif 394#endif
395 /* must be last, dynamically sized area in this! */
396 struct ieee80211_vif vif;
388}; 397};
389 398
399static inline
400struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
401{
402 return container_of(p, struct ieee80211_sub_if_data, vif);
403}
404
390#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev) 405#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev)
391 406
392enum { 407enum {
@@ -468,7 +483,8 @@ struct ieee80211_local {
468 483
469 struct list_head interfaces; 484 struct list_head interfaces;
470 485
471 int sta_scanning; 486 bool sta_sw_scanning;
487 bool sta_hw_scanning;
472 int scan_channel_idx; 488 int scan_channel_idx;
473 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; 489 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
474 unsigned long last_scan_completed; 490 unsigned long last_scan_completed;
@@ -481,10 +497,6 @@ struct ieee80211_local {
481 struct list_head sta_bss_list; 497 struct list_head sta_bss_list;
482 struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE]; 498 struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE];
483 spinlock_t sta_bss_lock; 499 spinlock_t sta_bss_lock;
484#define IEEE80211_SCAN_MATCH_SSID BIT(0)
485#define IEEE80211_SCAN_WPA_ONLY BIT(1)
486#define IEEE80211_SCAN_EXTRA_INFO BIT(2)
487 int scan_flags;
488 500
489 /* SNMP counters */ 501 /* SNMP counters */
490 /* dot11CountersTable */ 502 /* dot11CountersTable */
@@ -501,8 +513,9 @@ struct ieee80211_local {
501 513
502#ifdef CONFIG_MAC80211_LEDS 514#ifdef CONFIG_MAC80211_LEDS
503 int tx_led_counter, rx_led_counter; 515 int tx_led_counter, rx_led_counter;
504 struct led_trigger *tx_led, *rx_led, *assoc_led; 516 struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
505 char tx_led_name[32], rx_led_name[32], assoc_led_name[32]; 517 char tx_led_name[32], rx_led_name[32],
518 assoc_led_name[32], radio_led_name[32];
506#endif 519#endif
507 520
508 u32 channel_use; 521 u32 channel_use;
@@ -706,6 +719,9 @@ int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr);
706void ieee80211_if_setup(struct net_device *dev); 719void ieee80211_if_setup(struct net_device *dev);
707struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local, 720struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local,
708 int phymode, int hwrate); 721 int phymode, int hwrate);
722int ieee80211_hw_config_ht(struct ieee80211_local *local, int enable_ht,
723 struct ieee80211_ht_info *req_ht_cap,
724 struct ieee80211_ht_bss_info *req_bss_cap);
709 725
710/* ieee80211_ioctl.c */ 726/* ieee80211_ioctl.c */
711extern const struct iw_handler_def ieee80211_iw_handler_def; 727extern const struct iw_handler_def ieee80211_iw_handler_def;
@@ -747,7 +763,8 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len);
747void ieee80211_sta_req_auth(struct net_device *dev, 763void ieee80211_sta_req_auth(struct net_device *dev,
748 struct ieee80211_if_sta *ifsta); 764 struct ieee80211_if_sta *ifsta);
749int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len); 765int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len);
750void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb, 766ieee80211_txrx_result ieee80211_sta_rx_scan(struct net_device *dev,
767 struct sk_buff *skb,
751 struct ieee80211_rx_status *rx_status); 768 struct ieee80211_rx_status *rx_status);
752void ieee80211_rx_bss_list_init(struct net_device *dev); 769void ieee80211_rx_bss_list_init(struct net_device *dev);
753void ieee80211_rx_bss_list_deinit(struct net_device *dev); 770void ieee80211_rx_bss_list_deinit(struct net_device *dev);
@@ -757,9 +774,17 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev,
757 u8 *addr); 774 u8 *addr);
758int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); 775int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
759int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); 776int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
760void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes); 777void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
778 u32 changed);
761void ieee80211_reset_erp_info(struct net_device *dev); 779void ieee80211_reset_erp_info(struct net_device *dev);
762 780int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
781 struct ieee80211_ht_info *ht_info);
782int ieee80211_ht_addt_info_ie_to_ht_bss_info(
783 struct ieee80211_ht_addt_info *ht_add_info_ie,
784 struct ieee80211_ht_bss_info *bss_info);
785void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da,
786 u16 tid, u16 initiator, u16 reason);
787void sta_rx_agg_session_timer_expired(unsigned long data);
763/* ieee80211_iface.c */ 788/* ieee80211_iface.c */
764int ieee80211_if_add(struct net_device *dev, const char *name, 789int ieee80211_if_add(struct net_device *dev, const char *name,
765 struct net_device **new_dev, int type); 790 struct net_device **new_dev, int type);
@@ -791,8 +816,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
791extern void *mac80211_wiphy_privid; /* for wiphy privid */ 816extern void *mac80211_wiphy_privid; /* for wiphy privid */
792extern const unsigned char rfc1042_header[6]; 817extern const unsigned char rfc1042_header[6];
793extern const unsigned char bridge_tunnel_header[6]; 818extern const unsigned char bridge_tunnel_header[6];
794u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len); 819u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
795int ieee80211_is_eapol(const struct sk_buff *skb); 820 enum ieee80211_if_types type);
796int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, 821int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
797 int rate, int erp, int short_preamble); 822 int rate, int erp, int short_preamble);
798void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, 823void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c
index 43e505d29452..92f1eb2da311 100644
--- a/net/mac80211/ieee80211_iface.c
+++ b/net/mac80211/ieee80211_iface.c
@@ -22,7 +22,6 @@ void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata)
22 22
23 /* Default values for sub-interface parameters */ 23 /* Default values for sub-interface parameters */
24 sdata->drop_unencrypted = 0; 24 sdata->drop_unencrypted = 0;
25 sdata->eapol = 1;
26 for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) 25 for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
27 skb_queue_head_init(&sdata->fragments[i].skb_list); 26 skb_queue_head_init(&sdata->fragments[i].skb_list);
28 27
@@ -48,7 +47,7 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
48 int ret; 47 int ret;
49 48
50 ASSERT_RTNL(); 49 ASSERT_RTNL();
51 ndev = alloc_netdev(sizeof(struct ieee80211_sub_if_data), 50 ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size,
52 name, ieee80211_if_setup); 51 name, ieee80211_if_setup);
53 if (!ndev) 52 if (!ndev)
54 return -ENOMEM; 53 return -ENOMEM;
@@ -67,7 +66,7 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
67 sdata = IEEE80211_DEV_TO_SUB_IF(ndev); 66 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
68 ndev->ieee80211_ptr = &sdata->wdev; 67 ndev->ieee80211_ptr = &sdata->wdev;
69 sdata->wdev.wiphy = local->hw.wiphy; 68 sdata->wdev.wiphy = local->hw.wiphy;
70 sdata->type = IEEE80211_IF_TYPE_AP; 69 sdata->vif.type = IEEE80211_IF_TYPE_AP;
71 sdata->dev = ndev; 70 sdata->dev = ndev;
72 sdata->local = local; 71 sdata->local = local;
73 ieee80211_if_sdata_init(sdata); 72 ieee80211_if_sdata_init(sdata);
@@ -99,7 +98,7 @@ fail:
99void ieee80211_if_set_type(struct net_device *dev, int type) 98void ieee80211_if_set_type(struct net_device *dev, int type)
100{ 99{
101 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 100 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
102 int oldtype = sdata->type; 101 int oldtype = sdata->vif.type;
103 102
104 /* 103 /*
105 * We need to call this function on the master interface 104 * We need to call this function on the master interface
@@ -117,7 +116,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
117 116
118 /* most have no BSS pointer */ 117 /* most have no BSS pointer */
119 sdata->bss = NULL; 118 sdata->bss = NULL;
120 sdata->type = type; 119 sdata->vif.type = type;
121 120
122 switch (type) { 121 switch (type) {
123 case IEEE80211_IF_TYPE_WDS: 122 case IEEE80211_IF_TYPE_WDS:
@@ -127,7 +126,6 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
127 sdata->u.vlan.ap = NULL; 126 sdata->u.vlan.ap = NULL;
128 break; 127 break;
129 case IEEE80211_IF_TYPE_AP: 128 case IEEE80211_IF_TYPE_AP:
130 sdata->u.ap.dtim_period = 2;
131 sdata->u.ap.force_unicast_rateidx = -1; 129 sdata->u.ap.force_unicast_rateidx = -1;
132 sdata->u.ap.max_ratectrl_rateidx = -1; 130 sdata->u.ap.max_ratectrl_rateidx = -1;
133 skb_queue_head_init(&sdata->u.ap.ps_bc_buf); 131 skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
@@ -182,7 +180,7 @@ void ieee80211_if_reinit(struct net_device *dev)
182 180
183 ieee80211_if_sdata_deinit(sdata); 181 ieee80211_if_sdata_deinit(sdata);
184 182
185 switch (sdata->type) { 183 switch (sdata->vif.type) {
186 case IEEE80211_IF_TYPE_INVALID: 184 case IEEE80211_IF_TYPE_INVALID:
187 /* cannot happen */ 185 /* cannot happen */
188 WARN_ON(1); 186 WARN_ON(1);
@@ -208,8 +206,7 @@ void ieee80211_if_reinit(struct net_device *dev)
208 } 206 }
209 } 207 }
210 208
211 kfree(sdata->u.ap.beacon_head); 209 kfree(sdata->u.ap.beacon);
212 kfree(sdata->u.ap.beacon_tail);
213 210
214 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { 211 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
215 local->total_ps_buffered--; 212 local->total_ps_buffered--;
@@ -280,7 +277,7 @@ int ieee80211_if_remove(struct net_device *dev, const char *name, int id)
280 ASSERT_RTNL(); 277 ASSERT_RTNL();
281 278
282 list_for_each_entry_safe(sdata, n, &local->interfaces, list) { 279 list_for_each_entry_safe(sdata, n, &local->interfaces, list) {
283 if ((sdata->type == id || id == -1) && 280 if ((sdata->vif.type == id || id == -1) &&
284 strcmp(name, sdata->dev->name) == 0 && 281 strcmp(name, sdata->dev->name) == 0 &&
285 sdata->dev != local->mdev) { 282 sdata->dev != local->mdev) {
286 list_del_rcu(&sdata->list); 283 list_del_rcu(&sdata->list);
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 6caa3ec2cff7..5024d3733834 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -21,6 +21,7 @@
21 21
22#include <net/mac80211.h> 22#include <net/mac80211.h>
23#include "ieee80211_i.h" 23#include "ieee80211_i.h"
24#include "ieee80211_led.h"
24#include "ieee80211_rate.h" 25#include "ieee80211_rate.h"
25#include "wpa.h" 26#include "wpa.h"
26#include "aes_ccm.h" 27#include "aes_ccm.h"
@@ -111,8 +112,8 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
111 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) 112 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
112 return -EOPNOTSUPP; 113 return -EOPNOTSUPP;
113 114
114 if (sdata->type == IEEE80211_IF_TYPE_STA || 115 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
115 sdata->type == IEEE80211_IF_TYPE_IBSS) { 116 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
116 int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length); 117 int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length);
117 if (ret) 118 if (ret)
118 return ret; 119 return ret;
@@ -218,6 +219,8 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
218 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); 219 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
219 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); 220 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
220 221
222 range->scan_capa |= IW_SCAN_CAPA_ESSID;
223
221 return 0; 224 return 0;
222} 225}
223 226
@@ -229,7 +232,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
229 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 232 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
230 int type; 233 int type;
231 234
232 if (sdata->type == IEEE80211_IF_TYPE_VLAN) 235 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
233 return -EOPNOTSUPP; 236 return -EOPNOTSUPP;
234 237
235 switch (*mode) { 238 switch (*mode) {
@@ -246,7 +249,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
246 return -EINVAL; 249 return -EINVAL;
247 } 250 }
248 251
249 if (type == sdata->type) 252 if (type == sdata->vif.type)
250 return 0; 253 return 0;
251 if (netif_running(dev)) 254 if (netif_running(dev))
252 return -EBUSY; 255 return -EBUSY;
@@ -265,7 +268,7 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
265 struct ieee80211_sub_if_data *sdata; 268 struct ieee80211_sub_if_data *sdata;
266 269
267 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 270 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
268 switch (sdata->type) { 271 switch (sdata->vif.type) {
269 case IEEE80211_IF_TYPE_AP: 272 case IEEE80211_IF_TYPE_AP:
270 *mode = IW_MODE_MASTER; 273 *mode = IW_MODE_MASTER;
271 break; 274 break;
@@ -315,7 +318,7 @@ int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq)
315 } 318 }
316 319
317 if (set) { 320 if (set) {
318 if (local->sta_scanning) 321 if (local->sta_sw_scanning)
319 ret = 0; 322 ret = 0;
320 else 323 else
321 ret = ieee80211_hw_config(local); 324 ret = ieee80211_hw_config(local);
@@ -333,13 +336,13 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
333 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 336 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
334 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 337 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
335 338
336 if (sdata->type == IEEE80211_IF_TYPE_STA) 339 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
337 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; 340 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL;
338 341
339 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ 342 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
340 if (freq->e == 0) { 343 if (freq->e == 0) {
341 if (freq->m < 0) { 344 if (freq->m < 0) {
342 if (sdata->type == IEEE80211_IF_TYPE_STA) 345 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
343 sdata->u.sta.flags |= 346 sdata->u.sta.flags |=
344 IEEE80211_STA_AUTO_CHANNEL_SEL; 347 IEEE80211_STA_AUTO_CHANNEL_SEL;
345 return 0; 348 return 0;
@@ -385,8 +388,8 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
385 len--; 388 len--;
386 389
387 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 390 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
388 if (sdata->type == IEEE80211_IF_TYPE_STA || 391 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
389 sdata->type == IEEE80211_IF_TYPE_IBSS) { 392 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
390 int ret; 393 int ret;
391 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { 394 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
392 if (len > IEEE80211_MAX_SSID_LEN) 395 if (len > IEEE80211_MAX_SSID_LEN)
@@ -406,7 +409,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
406 return 0; 409 return 0;
407 } 410 }
408 411
409 if (sdata->type == IEEE80211_IF_TYPE_AP) { 412 if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
410 memcpy(sdata->u.ap.ssid, ssid, len); 413 memcpy(sdata->u.ap.ssid, ssid, len);
411 memset(sdata->u.ap.ssid + len, 0, 414 memset(sdata->u.ap.ssid + len, 0,
412 IEEE80211_MAX_SSID_LEN - len); 415 IEEE80211_MAX_SSID_LEN - len);
@@ -425,8 +428,8 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
425 428
426 struct ieee80211_sub_if_data *sdata; 429 struct ieee80211_sub_if_data *sdata;
427 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 430 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
428 if (sdata->type == IEEE80211_IF_TYPE_STA || 431 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
429 sdata->type == IEEE80211_IF_TYPE_IBSS) { 432 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
430 int res = ieee80211_sta_get_ssid(dev, ssid, &len); 433 int res = ieee80211_sta_get_ssid(dev, ssid, &len);
431 if (res == 0) { 434 if (res == 0) {
432 data->length = len; 435 data->length = len;
@@ -436,7 +439,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
436 return res; 439 return res;
437 } 440 }
438 441
439 if (sdata->type == IEEE80211_IF_TYPE_AP) { 442 if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
440 len = sdata->u.ap.ssid_len; 443 len = sdata->u.ap.ssid_len;
441 if (len > IW_ESSID_MAX_SIZE) 444 if (len > IW_ESSID_MAX_SIZE)
442 len = IW_ESSID_MAX_SIZE; 445 len = IW_ESSID_MAX_SIZE;
@@ -456,8 +459,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
456 struct ieee80211_sub_if_data *sdata; 459 struct ieee80211_sub_if_data *sdata;
457 460
458 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 461 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
459 if (sdata->type == IEEE80211_IF_TYPE_STA || 462 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
460 sdata->type == IEEE80211_IF_TYPE_IBSS) { 463 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
461 int ret; 464 int ret;
462 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { 465 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
463 memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, 466 memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data,
@@ -476,7 +479,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
476 return ret; 479 return ret;
477 ieee80211_sta_req_auth(dev, &sdata->u.sta); 480 ieee80211_sta_req_auth(dev, &sdata->u.sta);
478 return 0; 481 return 0;
479 } else if (sdata->type == IEEE80211_IF_TYPE_WDS) { 482 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
480 if (memcmp(sdata->u.wds.remote_addr, (u8 *) &ap_addr->sa_data, 483 if (memcmp(sdata->u.wds.remote_addr, (u8 *) &ap_addr->sa_data,
481 ETH_ALEN) == 0) 484 ETH_ALEN) == 0)
482 return 0; 485 return 0;
@@ -494,12 +497,12 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
494 struct ieee80211_sub_if_data *sdata; 497 struct ieee80211_sub_if_data *sdata;
495 498
496 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 499 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
497 if (sdata->type == IEEE80211_IF_TYPE_STA || 500 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
498 sdata->type == IEEE80211_IF_TYPE_IBSS) { 501 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
499 ap_addr->sa_family = ARPHRD_ETHER; 502 ap_addr->sa_family = ARPHRD_ETHER;
500 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); 503 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN);
501 return 0; 504 return 0;
502 } else if (sdata->type == IEEE80211_IF_TYPE_WDS) { 505 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
503 ap_addr->sa_family = ARPHRD_ETHER; 506 ap_addr->sa_family = ARPHRD_ETHER;
504 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); 507 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
505 return 0; 508 return 0;
@@ -513,7 +516,6 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
513 struct iw_request_info *info, 516 struct iw_request_info *info,
514 union iwreq_data *wrqu, char *extra) 517 union iwreq_data *wrqu, char *extra)
515{ 518{
516 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
517 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 519 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
518 struct iw_scan_req *req = NULL; 520 struct iw_scan_req *req = NULL;
519 u8 *ssid = NULL; 521 u8 *ssid = NULL;
@@ -522,23 +524,10 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
522 if (!netif_running(dev)) 524 if (!netif_running(dev))
523 return -ENETDOWN; 525 return -ENETDOWN;
524 526
525 switch (sdata->type) { 527 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
526 case IEEE80211_IF_TYPE_STA: 528 sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
527 case IEEE80211_IF_TYPE_IBSS: 529 sdata->vif.type != IEEE80211_IF_TYPE_AP)
528 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
529 ssid = sdata->u.sta.ssid;
530 ssid_len = sdata->u.sta.ssid_len;
531 }
532 break;
533 case IEEE80211_IF_TYPE_AP:
534 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
535 ssid = sdata->u.ap.ssid;
536 ssid_len = sdata->u.ap.ssid_len;
537 }
538 break;
539 default:
540 return -EOPNOTSUPP; 530 return -EOPNOTSUPP;
541 }
542 531
543 /* if SSID was specified explicitly then use that */ 532 /* if SSID was specified explicitly then use that */
544 if (wrqu->data.length == sizeof(struct iw_scan_req) && 533 if (wrqu->data.length == sizeof(struct iw_scan_req) &&
@@ -558,8 +547,10 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev,
558{ 547{
559 int res; 548 int res;
560 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 549 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
561 if (local->sta_scanning) 550
551 if (local->sta_sw_scanning || local->sta_hw_scanning)
562 return -EAGAIN; 552 return -EAGAIN;
553
563 res = ieee80211_sta_scan_results(dev, extra, data->length); 554 res = ieee80211_sta_scan_results(dev, extra, data->length);
564 if (res >= 0) { 555 if (res >= 0) {
565 data->length = res; 556 data->length = res;
@@ -591,7 +582,7 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev,
591 sdata->bss->force_unicast_rateidx = -1; 582 sdata->bss->force_unicast_rateidx = -1;
592 if (rate->value < 0) 583 if (rate->value < 0)
593 return 0; 584 return 0;
594 for (i=0; i< mode->num_rates; i++) { 585 for (i=0; i < mode->num_rates; i++) {
595 struct ieee80211_rate *rates = &mode->rates[i]; 586 struct ieee80211_rate *rates = &mode->rates[i];
596 int this_rate = rates->rate; 587 int this_rate = rates->rate;
597 588
@@ -599,10 +590,10 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev,
599 sdata->bss->max_ratectrl_rateidx = i; 590 sdata->bss->max_ratectrl_rateidx = i;
600 if (rate->fixed) 591 if (rate->fixed)
601 sdata->bss->force_unicast_rateidx = i; 592 sdata->bss->force_unicast_rateidx = i;
602 break; 593 return 0;
603 } 594 }
604 } 595 }
605 return 0; 596 return -EINVAL;
606} 597}
607 598
608static int ieee80211_ioctl_giwrate(struct net_device *dev, 599static int ieee80211_ioctl_giwrate(struct net_device *dev,
@@ -614,7 +605,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
614 struct ieee80211_sub_if_data *sdata; 605 struct ieee80211_sub_if_data *sdata;
615 606
616 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 607 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
617 if (sdata->type == IEEE80211_IF_TYPE_STA) 608 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
618 sta = sta_info_get(local, sdata->u.sta.bssid); 609 sta = sta_info_get(local, sdata->u.sta.bssid);
619 else 610 else
620 return -EOPNOTSUPP; 611 return -EOPNOTSUPP;
@@ -634,22 +625,36 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
634{ 625{
635 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 626 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
636 bool need_reconfig = 0; 627 bool need_reconfig = 0;
628 u8 new_power_level;
637 629
638 if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) 630 if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
639 return -EINVAL; 631 return -EINVAL;
640 if (data->txpower.flags & IW_TXPOW_RANGE) 632 if (data->txpower.flags & IW_TXPOW_RANGE)
641 return -EINVAL; 633 return -EINVAL;
642 if (!data->txpower.fixed)
643 return -EINVAL;
644 634
645 if (local->hw.conf.power_level != data->txpower.value) { 635 if (data->txpower.fixed) {
646 local->hw.conf.power_level = data->txpower.value; 636 new_power_level = data->txpower.value;
637 } else {
638 /* Automatic power level. Get the px power from the current
639 * channel. */
640 struct ieee80211_channel* chan = local->oper_channel;
641 if (!chan)
642 return -EINVAL;
643
644 new_power_level = chan->power_level;
645 }
646
647 if (local->hw.conf.power_level != new_power_level) {
648 local->hw.conf.power_level = new_power_level;
647 need_reconfig = 1; 649 need_reconfig = 1;
648 } 650 }
651
649 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { 652 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
650 local->hw.conf.radio_enabled = !(data->txpower.disabled); 653 local->hw.conf.radio_enabled = !(data->txpower.disabled);
651 need_reconfig = 1; 654 need_reconfig = 1;
655 ieee80211_led_radio(local, local->hw.conf.radio_enabled);
652 } 656 }
657
653 if (need_reconfig) { 658 if (need_reconfig) {
654 ieee80211_hw_config(local); 659 ieee80211_hw_config(local);
655 /* The return value of hw_config is not of big interest here, 660 /* The return value of hw_config is not of big interest here,
@@ -814,8 +819,8 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev,
814 struct iw_mlme *mlme = (struct iw_mlme *) extra; 819 struct iw_mlme *mlme = (struct iw_mlme *) extra;
815 820
816 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 821 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
817 if (sdata->type != IEEE80211_IF_TYPE_STA && 822 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
818 sdata->type != IEEE80211_IF_TYPE_IBSS) 823 sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
819 return -EINVAL; 824 return -EINVAL;
820 825
821 switch (mlme->cmd) { 826 switch (mlme->cmd) {
@@ -917,7 +922,6 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
917 struct iw_request_info *info, 922 struct iw_request_info *info,
918 struct iw_param *data, char *extra) 923 struct iw_param *data, char *extra)
919{ 924{
920 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
921 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 925 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
922 int ret = 0; 926 int ret = 0;
923 927
@@ -927,32 +931,33 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
927 case IW_AUTH_CIPHER_GROUP: 931 case IW_AUTH_CIPHER_GROUP:
928 case IW_AUTH_WPA_ENABLED: 932 case IW_AUTH_WPA_ENABLED:
929 case IW_AUTH_RX_UNENCRYPTED_EAPOL: 933 case IW_AUTH_RX_UNENCRYPTED_EAPOL:
930 break;
931 case IW_AUTH_KEY_MGMT: 934 case IW_AUTH_KEY_MGMT:
932 if (sdata->type != IEEE80211_IF_TYPE_STA) 935 break;
936 case IW_AUTH_DROP_UNENCRYPTED:
937 sdata->drop_unencrypted = !!data->value;
938 break;
939 case IW_AUTH_PRIVACY_INVOKED:
940 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
933 ret = -EINVAL; 941 ret = -EINVAL;
934 else { 942 else {
943 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
935 /* 944 /*
936 * Key management was set by wpa_supplicant, 945 * Privacy invoked by wpa_supplicant, store the
937 * we only need this to associate to a network 946 * value and allow associating to a protected
938 * that has privacy enabled regardless of not 947 * network without having a key up front.
939 * having a key.
940 */ 948 */
941 sdata->u.sta.key_management_enabled = !!data->value; 949 if (data->value)
950 sdata->u.sta.flags |=
951 IEEE80211_STA_PRIVACY_INVOKED;
942 } 952 }
943 break; 953 break;
944 case IW_AUTH_80211_AUTH_ALG: 954 case IW_AUTH_80211_AUTH_ALG:
945 if (sdata->type == IEEE80211_IF_TYPE_STA || 955 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
946 sdata->type == IEEE80211_IF_TYPE_IBSS) 956 sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
947 sdata->u.sta.auth_algs = data->value; 957 sdata->u.sta.auth_algs = data->value;
948 else 958 else
949 ret = -EOPNOTSUPP; 959 ret = -EOPNOTSUPP;
950 break; 960 break;
951 case IW_AUTH_PRIVACY_INVOKED:
952 if (local->ops->set_privacy_invoked)
953 ret = local->ops->set_privacy_invoked(
954 local_to_hw(local), data->value);
955 break;
956 default: 961 default:
957 ret = -EOPNOTSUPP; 962 ret = -EOPNOTSUPP;
958 break; 963 break;
@@ -968,8 +973,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
968 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 973 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
969 struct sta_info *sta = NULL; 974 struct sta_info *sta = NULL;
970 975
971 if (sdata->type == IEEE80211_IF_TYPE_STA || 976 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
972 sdata->type == IEEE80211_IF_TYPE_IBSS) 977 sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
973 sta = sta_info_get(local, sdata->u.sta.bssid); 978 sta = sta_info_get(local, sdata->u.sta.bssid);
974 if (!sta) { 979 if (!sta) {
975 wstats->discard.fragment = 0; 980 wstats->discard.fragment = 0;
@@ -997,8 +1002,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev,
997 1002
998 switch (data->flags & IW_AUTH_INDEX) { 1003 switch (data->flags & IW_AUTH_INDEX) {
999 case IW_AUTH_80211_AUTH_ALG: 1004 case IW_AUTH_80211_AUTH_ALG:
1000 if (sdata->type == IEEE80211_IF_TYPE_STA || 1005 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
1001 sdata->type == IEEE80211_IF_TYPE_IBSS) 1006 sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
1002 data->value = sdata->u.sta.auth_algs; 1007 data->value = sdata->u.sta.auth_algs;
1003 else 1008 else
1004 ret = -EOPNOTSUPP; 1009 ret = -EOPNOTSUPP;
diff --git a/net/mac80211/ieee80211_led.c b/net/mac80211/ieee80211_led.c
index 4cf89af9d100..f401484ab6d7 100644
--- a/net/mac80211/ieee80211_led.c
+++ b/net/mac80211/ieee80211_led.c
@@ -43,6 +43,16 @@ void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
43 led_trigger_event(local->assoc_led, LED_OFF); 43 led_trigger_event(local->assoc_led, LED_OFF);
44} 44}
45 45
46void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
47{
48 if (unlikely(!local->radio_led))
49 return;
50 if (enabled)
51 led_trigger_event(local->radio_led, LED_FULL);
52 else
53 led_trigger_event(local->radio_led, LED_OFF);
54}
55
46void ieee80211_led_init(struct ieee80211_local *local) 56void ieee80211_led_init(struct ieee80211_local *local)
47{ 57{
48 local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); 58 local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
@@ -77,10 +87,25 @@ void ieee80211_led_init(struct ieee80211_local *local)
77 local->assoc_led = NULL; 87 local->assoc_led = NULL;
78 } 88 }
79 } 89 }
90
91 local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
92 if (local->radio_led) {
93 snprintf(local->radio_led_name, sizeof(local->radio_led_name),
94 "%sradio", wiphy_name(local->hw.wiphy));
95 local->radio_led->name = local->radio_led_name;
96 if (led_trigger_register(local->radio_led)) {
97 kfree(local->radio_led);
98 local->radio_led = NULL;
99 }
100 }
80} 101}
81 102
82void ieee80211_led_exit(struct ieee80211_local *local) 103void ieee80211_led_exit(struct ieee80211_local *local)
83{ 104{
105 if (local->radio_led) {
106 led_trigger_unregister(local->radio_led);
107 kfree(local->radio_led);
108 }
84 if (local->assoc_led) { 109 if (local->assoc_led) {
85 led_trigger_unregister(local->assoc_led); 110 led_trigger_unregister(local->assoc_led);
86 kfree(local->assoc_led); 111 kfree(local->assoc_led);
@@ -95,6 +120,16 @@ void ieee80211_led_exit(struct ieee80211_local *local)
95 } 120 }
96} 121}
97 122
123char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
124{
125 struct ieee80211_local *local = hw_to_local(hw);
126
127 if (local->radio_led)
128 return local->radio_led_name;
129 return NULL;
130}
131EXPORT_SYMBOL(__ieee80211_get_radio_led_name);
132
98char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) 133char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
99{ 134{
100 struct ieee80211_local *local = hw_to_local(hw); 135 struct ieee80211_local *local = hw_to_local(hw);
diff --git a/net/mac80211/ieee80211_led.h b/net/mac80211/ieee80211_led.h
index 0feb22619835..77b1e1ba6039 100644
--- a/net/mac80211/ieee80211_led.h
+++ b/net/mac80211/ieee80211_led.h
@@ -16,6 +16,8 @@ extern void ieee80211_led_rx(struct ieee80211_local *local);
16extern void ieee80211_led_tx(struct ieee80211_local *local, int q); 16extern void ieee80211_led_tx(struct ieee80211_local *local, int q);
17extern void ieee80211_led_assoc(struct ieee80211_local *local, 17extern void ieee80211_led_assoc(struct ieee80211_local *local,
18 bool associated); 18 bool associated);
19extern void ieee80211_led_radio(struct ieee80211_local *local,
20 bool enabled);
19extern void ieee80211_led_init(struct ieee80211_local *local); 21extern void ieee80211_led_init(struct ieee80211_local *local);
20extern void ieee80211_led_exit(struct ieee80211_local *local); 22extern void ieee80211_led_exit(struct ieee80211_local *local);
21#else 23#else
@@ -29,6 +31,10 @@ static inline void ieee80211_led_assoc(struct ieee80211_local *local,
29 bool associated) 31 bool associated)
30{ 32{
31} 33}
34static inline void ieee80211_led_radio(struct ieee80211_local *local,
35 bool enabled)
36{
37}
32static inline void ieee80211_led_init(struct ieee80211_local *local) 38static inline void ieee80211_led_init(struct ieee80211_local *local)
33{ 39{
34} 40}
diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c
index 93abb8fff141..b957e67c5fba 100644
--- a/net/mac80211/ieee80211_rate.c
+++ b/net/mac80211/ieee80211_rate.c
@@ -21,17 +21,35 @@ struct rate_control_alg {
21static LIST_HEAD(rate_ctrl_algs); 21static LIST_HEAD(rate_ctrl_algs);
22static DEFINE_MUTEX(rate_ctrl_mutex); 22static DEFINE_MUTEX(rate_ctrl_mutex);
23 23
24static char *ieee80211_default_rc_algo = CONFIG_MAC80211_RC_DEFAULT;
25module_param(ieee80211_default_rc_algo, charp, 0644);
26MODULE_PARM_DESC(ieee80211_default_rc_algo,
27 "Default rate control algorithm for mac80211 to use");
28
24int ieee80211_rate_control_register(struct rate_control_ops *ops) 29int ieee80211_rate_control_register(struct rate_control_ops *ops)
25{ 30{
26 struct rate_control_alg *alg; 31 struct rate_control_alg *alg;
27 32
33 if (!ops->name)
34 return -EINVAL;
35
36 mutex_lock(&rate_ctrl_mutex);
37 list_for_each_entry(alg, &rate_ctrl_algs, list) {
38 if (!strcmp(alg->ops->name, ops->name)) {
39 /* don't register an algorithm twice */
40 WARN_ON(1);
41 mutex_unlock(&rate_ctrl_mutex);
42 return -EALREADY;
43 }
44 }
45
28 alg = kzalloc(sizeof(*alg), GFP_KERNEL); 46 alg = kzalloc(sizeof(*alg), GFP_KERNEL);
29 if (alg == NULL) { 47 if (alg == NULL) {
48 mutex_unlock(&rate_ctrl_mutex);
30 return -ENOMEM; 49 return -ENOMEM;
31 } 50 }
32 alg->ops = ops; 51 alg->ops = ops;
33 52
34 mutex_lock(&rate_ctrl_mutex);
35 list_add_tail(&alg->list, &rate_ctrl_algs); 53 list_add_tail(&alg->list, &rate_ctrl_algs);
36 mutex_unlock(&rate_ctrl_mutex); 54 mutex_unlock(&rate_ctrl_mutex);
37 55
@@ -47,11 +65,11 @@ void ieee80211_rate_control_unregister(struct rate_control_ops *ops)
47 list_for_each_entry(alg, &rate_ctrl_algs, list) { 65 list_for_each_entry(alg, &rate_ctrl_algs, list) {
48 if (alg->ops == ops) { 66 if (alg->ops == ops) {
49 list_del(&alg->list); 67 list_del(&alg->list);
68 kfree(alg);
50 break; 69 break;
51 } 70 }
52 } 71 }
53 mutex_unlock(&rate_ctrl_mutex); 72 mutex_unlock(&rate_ctrl_mutex);
54 kfree(alg);
55} 73}
56EXPORT_SYMBOL(ieee80211_rate_control_unregister); 74EXPORT_SYMBOL(ieee80211_rate_control_unregister);
57 75
@@ -61,9 +79,12 @@ ieee80211_try_rate_control_ops_get(const char *name)
61 struct rate_control_alg *alg; 79 struct rate_control_alg *alg;
62 struct rate_control_ops *ops = NULL; 80 struct rate_control_ops *ops = NULL;
63 81
82 if (!name)
83 return NULL;
84
64 mutex_lock(&rate_ctrl_mutex); 85 mutex_lock(&rate_ctrl_mutex);
65 list_for_each_entry(alg, &rate_ctrl_algs, list) { 86 list_for_each_entry(alg, &rate_ctrl_algs, list) {
66 if (!name || !strcmp(alg->ops->name, name)) 87 if (!strcmp(alg->ops->name, name))
67 if (try_module_get(alg->ops->module)) { 88 if (try_module_get(alg->ops->module)) {
68 ops = alg->ops; 89 ops = alg->ops;
69 break; 90 break;
@@ -73,18 +94,31 @@ ieee80211_try_rate_control_ops_get(const char *name)
73 return ops; 94 return ops;
74} 95}
75 96
76/* Get the rate control algorithm. If `name' is NULL, get the first 97/* Get the rate control algorithm. */
77 * available algorithm. */
78static struct rate_control_ops * 98static struct rate_control_ops *
79ieee80211_rate_control_ops_get(const char *name) 99ieee80211_rate_control_ops_get(const char *name)
80{ 100{
81 struct rate_control_ops *ops; 101 struct rate_control_ops *ops;
102 const char *alg_name;
82 103
83 ops = ieee80211_try_rate_control_ops_get(name); 104 if (!name)
105 alg_name = ieee80211_default_rc_algo;
106 else
107 alg_name = name;
108
109 ops = ieee80211_try_rate_control_ops_get(alg_name);
84 if (!ops) { 110 if (!ops) {
85 request_module("rc80211_%s", name ? name : "default"); 111 request_module("rc80211_%s", alg_name);
86 ops = ieee80211_try_rate_control_ops_get(name); 112 ops = ieee80211_try_rate_control_ops_get(alg_name);
87 } 113 }
114 if (!ops && name)
115 /* try default if specific alg requested but not found */
116 ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo);
117
118 /* try built-in one if specific alg requested but not found */
119 if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
120 ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
121
88 return ops; 122 return ops;
89} 123}
90 124
@@ -128,6 +162,37 @@ static void rate_control_release(struct kref *kref)
128 kfree(ctrl_ref); 162 kfree(ctrl_ref);
129} 163}
130 164
165void rate_control_get_rate(struct net_device *dev,
166 struct ieee80211_hw_mode *mode, struct sk_buff *skb,
167 struct rate_selection *sel)
168{
169 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
170 struct rate_control_ref *ref = local->rate_ctrl;
171 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
172 struct sta_info *sta = sta_info_get(local, hdr->addr1);
173 int i;
174
175 memset(sel, 0, sizeof(struct rate_selection));
176
177 ref->ops->get_rate(ref->priv, dev, mode, skb, sel);
178
179 /* Select a non-ERP backup rate. */
180 if (!sel->nonerp) {
181 for (i = 0; i < mode->num_rates - 1; i++) {
182 struct ieee80211_rate *rate = &mode->rates[i];
183 if (sel->rate->rate < rate->rate)
184 break;
185
186 if (rate_supported(sta, mode, i) &&
187 !(rate->flags & IEEE80211_RATE_ERP))
188 sel->nonerp = rate;
189 }
190 }
191
192 if (sta)
193 sta_info_put(sta);
194}
195
131struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) 196struct rate_control_ref *rate_control_get(struct rate_control_ref *ref)
132{ 197{
133 kref_get(&ref->kref); 198 kref_get(&ref->kref);
@@ -178,3 +243,4 @@ void rate_control_deinitialize(struct ieee80211_local *local)
178 local->rate_ctrl = NULL; 243 local->rate_ctrl = NULL;
179 rate_control_put(ref); 244 rate_control_put(ref);
180} 245}
246
diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h
index 7cd1ebab4f83..73f19e8aa51c 100644
--- a/net/mac80211/ieee80211_rate.h
+++ b/net/mac80211/ieee80211_rate.h
@@ -18,31 +18,24 @@
18#include "ieee80211_i.h" 18#include "ieee80211_i.h"
19#include "sta_info.h" 19#include "sta_info.h"
20 20
21#define RATE_CONTROL_NUM_DOWN 20 21struct rate_selection {
22#define RATE_CONTROL_NUM_UP 15 22 /* Selected transmission rate */
23 23 struct ieee80211_rate *rate;
24 24 /* Non-ERP rate to use if mac80211 decides it cannot use an ERP rate */
25struct rate_control_extra {
26 /* values from rate_control_get_rate() to the caller: */
27 struct ieee80211_rate *probe; /* probe with this rate, or NULL for no
28 * probing */
29 struct ieee80211_rate *nonerp; 25 struct ieee80211_rate *nonerp;
30 26 /* probe with this rate, or NULL for no probing */
31 /* parameters from the caller to rate_control_get_rate(): */ 27 struct ieee80211_rate *probe;
32 struct ieee80211_hw_mode *mode;
33 u16 ethertype;
34}; 28};
35 29
36
37struct rate_control_ops { 30struct rate_control_ops {
38 struct module *module; 31 struct module *module;
39 const char *name; 32 const char *name;
40 void (*tx_status)(void *priv, struct net_device *dev, 33 void (*tx_status)(void *priv, struct net_device *dev,
41 struct sk_buff *skb, 34 struct sk_buff *skb,
42 struct ieee80211_tx_status *status); 35 struct ieee80211_tx_status *status);
43 struct ieee80211_rate *(*get_rate)(void *priv, struct net_device *dev, 36 void (*get_rate)(void *priv, struct net_device *dev,
44 struct sk_buff *skb, 37 struct ieee80211_hw_mode *mode, struct sk_buff *skb,
45 struct rate_control_extra *extra); 38 struct rate_selection *sel);
46 void (*rate_init)(void *priv, void *priv_sta, 39 void (*rate_init)(void *priv, void *priv_sta,
47 struct ieee80211_local *local, struct sta_info *sta); 40 struct ieee80211_local *local, struct sta_info *sta);
48 void (*clear)(void *priv); 41 void (*clear)(void *priv);
@@ -72,25 +65,20 @@ void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
72 * first available algorithm. */ 65 * first available algorithm. */
73struct rate_control_ref *rate_control_alloc(const char *name, 66struct rate_control_ref *rate_control_alloc(const char *name,
74 struct ieee80211_local *local); 67 struct ieee80211_local *local);
68void rate_control_get_rate(struct net_device *dev,
69 struct ieee80211_hw_mode *mode, struct sk_buff *skb,
70 struct rate_selection *sel);
75struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); 71struct rate_control_ref *rate_control_get(struct rate_control_ref *ref);
76void rate_control_put(struct rate_control_ref *ref); 72void rate_control_put(struct rate_control_ref *ref);
77 73
78static inline void rate_control_tx_status(struct ieee80211_local *local, 74static inline void rate_control_tx_status(struct net_device *dev,
79 struct net_device *dev,
80 struct sk_buff *skb, 75 struct sk_buff *skb,
81 struct ieee80211_tx_status *status) 76 struct ieee80211_tx_status *status)
82{ 77{
78 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
83 struct rate_control_ref *ref = local->rate_ctrl; 79 struct rate_control_ref *ref = local->rate_ctrl;
84 ref->ops->tx_status(ref->priv, dev, skb, status);
85}
86
87 80
88static inline struct ieee80211_rate * 81 ref->ops->tx_status(ref->priv, dev, skb, status);
89rate_control_get_rate(struct ieee80211_local *local, struct net_device *dev,
90 struct sk_buff *skb, struct rate_control_extra *extra)
91{
92 struct rate_control_ref *ref = local->rate_ctrl;
93 return ref->ops->get_rate(ref->priv, dev, skb, extra);
94} 82}
95 83
96 84
@@ -139,10 +127,73 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
139#endif 127#endif
140} 128}
141 129
130static inline int
131rate_supported(struct sta_info *sta, struct ieee80211_hw_mode *mode, int index)
132{
133 return (sta == NULL || sta->supp_rates & BIT(index)) &&
134 (mode->rates[index].flags & IEEE80211_RATE_SUPPORTED);
135}
136
137static inline int
138rate_lowest_index(struct ieee80211_local *local, struct ieee80211_hw_mode *mode,
139 struct sta_info *sta)
140{
141 int i;
142
143 for (i = 0; i < mode->num_rates; i++) {
144 if (rate_supported(sta, mode, i))
145 return i;
146 }
147
148 /* warn when we cannot find a rate. */
149 WARN_ON(1);
150
151 return 0;
152}
153
154static inline struct ieee80211_rate *
155rate_lowest(struct ieee80211_local *local, struct ieee80211_hw_mode *mode,
156 struct sta_info *sta)
157{
158 return &mode->rates[rate_lowest_index(local, mode, sta)];
159}
160
142 161
143/* functions for rate control related to a device */ 162/* functions for rate control related to a device */
144int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, 163int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
145 const char *name); 164 const char *name);
146void rate_control_deinitialize(struct ieee80211_local *local); 165void rate_control_deinitialize(struct ieee80211_local *local);
147 166
167
168/* Rate control algorithms */
169#if defined(RC80211_SIMPLE_COMPILE) || \
170 (defined(CONFIG_MAC80211_RC_SIMPLE) && \
171 !defined(CONFIG_MAC80211_RC_SIMPLE_MODULE))
172extern int rc80211_simple_init(void);
173extern void rc80211_simple_exit(void);
174#else
175static inline int rc80211_simple_init(void)
176{
177 return 0;
178}
179static inline void rc80211_simple_exit(void)
180{
181}
182#endif
183
184#if defined(RC80211_PID_COMPILE) || \
185 (defined(CONFIG_MAC80211_RC_PID) && \
186 !defined(CONFIG_MAC80211_RC_PID_MODULE))
187extern int rc80211_pid_init(void);
188extern void rc80211_pid_exit(void);
189#else
190static inline int rc80211_pid_init(void)
191{
192 return 0;
193}
194static inline void rc80211_pid_exit(void)
195{
196}
197#endif
198
148#endif /* IEEE80211_RATE_H */ 199#endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index fda0e06453e8..2019b4f0528d 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -57,6 +57,20 @@
57 57
58#define ERP_INFO_USE_PROTECTION BIT(1) 58#define ERP_INFO_USE_PROTECTION BIT(1)
59 59
60/* mgmt header + 1 byte action code */
61#define IEEE80211_MIN_ACTION_SIZE (24 + 1)
62
63#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
64#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
65#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
66#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000
67#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
68
69/* next values represent the buffer size for A-MPDU frame.
70 * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) */
71#define IEEE80211_MIN_AMPDU_BUF 0x8
72#define IEEE80211_MAX_AMPDU_BUF 0x40
73
60static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, 74static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
61 u8 *ssid, size_t ssid_len); 75 u8 *ssid, size_t ssid_len);
62static struct ieee80211_sta_bss * 76static struct ieee80211_sta_bss *
@@ -90,7 +104,8 @@ struct ieee802_11_elems {
90 u8 *ext_supp_rates; 104 u8 *ext_supp_rates;
91 u8 *wmm_info; 105 u8 *wmm_info;
92 u8 *wmm_param; 106 u8 *wmm_param;
93 107 u8 *ht_cap_elem;
108 u8 *ht_info_elem;
94 /* length of them, respectively */ 109 /* length of them, respectively */
95 u8 ssid_len; 110 u8 ssid_len;
96 u8 supp_rates_len; 111 u8 supp_rates_len;
@@ -106,6 +121,8 @@ struct ieee802_11_elems {
106 u8 ext_supp_rates_len; 121 u8 ext_supp_rates_len;
107 u8 wmm_info_len; 122 u8 wmm_info_len;
108 u8 wmm_param_len; 123 u8 wmm_param_len;
124 u8 ht_cap_elem_len;
125 u8 ht_info_elem_len;
109}; 126};
110 127
111static void ieee802_11_parse_elems(u8 *start, size_t len, 128static void ieee802_11_parse_elems(u8 *start, size_t len,
@@ -190,6 +207,14 @@ static void ieee802_11_parse_elems(u8 *start, size_t len,
190 elems->ext_supp_rates = pos; 207 elems->ext_supp_rates = pos;
191 elems->ext_supp_rates_len = elen; 208 elems->ext_supp_rates_len = elen;
192 break; 209 break;
210 case WLAN_EID_HT_CAPABILITY:
211 elems->ht_cap_elem = pos;
212 elems->ht_cap_elem_len = elen;
213 break;
214 case WLAN_EID_HT_EXTRA_INFO:
215 elems->ht_info_elem = pos;
216 elems->ht_info_elem_len = elen;
217 break;
193 default: 218 default:
194 break; 219 break;
195 } 220 }
@@ -288,50 +313,89 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
288} 313}
289 314
290 315
291static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) 316static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata,
317 u8 erp_value)
292{ 318{
293 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 319 struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
294 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 320 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
295 int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; 321 bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0;
296 int preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0; 322 bool preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0;
297 u8 changes = 0;
298 DECLARE_MAC_BUF(mac); 323 DECLARE_MAC_BUF(mac);
324 u32 changed = 0;
299 325
300 if (use_protection != !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION)) { 326 if (use_protection != bss_conf->use_cts_prot) {
301 if (net_ratelimit()) { 327 if (net_ratelimit()) {
302 printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" 328 printk(KERN_DEBUG "%s: CTS protection %s (BSSID="
303 "%s)\n", 329 "%s)\n",
304 dev->name, 330 sdata->dev->name,
305 use_protection ? "enabled" : "disabled", 331 use_protection ? "enabled" : "disabled",
306 print_mac(mac, ifsta->bssid)); 332 print_mac(mac, ifsta->bssid));
307 } 333 }
308 if (use_protection) 334 bss_conf->use_cts_prot = use_protection;
309 sdata->flags |= IEEE80211_SDATA_USE_PROTECTION; 335 changed |= BSS_CHANGED_ERP_CTS_PROT;
310 else
311 sdata->flags &= ~IEEE80211_SDATA_USE_PROTECTION;
312 changes |= IEEE80211_ERP_CHANGE_PROTECTION;
313 } 336 }
314 337
315 if (preamble_mode != !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)) { 338 if (preamble_mode != bss_conf->use_short_preamble) {
316 if (net_ratelimit()) { 339 if (net_ratelimit()) {
317 printk(KERN_DEBUG "%s: switched to %s barker preamble" 340 printk(KERN_DEBUG "%s: switched to %s barker preamble"
318 " (BSSID=%s)\n", 341 " (BSSID=%s)\n",
319 dev->name, 342 sdata->dev->name,
320 (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ? 343 (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ?
321 "short" : "long", 344 "short" : "long",
322 print_mac(mac, ifsta->bssid)); 345 print_mac(mac, ifsta->bssid));
323 } 346 }
324 if (preamble_mode) 347 bss_conf->use_short_preamble = preamble_mode;
325 sdata->flags &= ~IEEE80211_SDATA_SHORT_PREAMBLE; 348 changed |= BSS_CHANGED_ERP_PREAMBLE;
326 else
327 sdata->flags |= IEEE80211_SDATA_SHORT_PREAMBLE;
328 changes |= IEEE80211_ERP_CHANGE_PREAMBLE;
329 } 349 }
330 350
331 if (changes) 351 return changed;
332 ieee80211_erp_info_change_notify(dev, changes); 352}
353
354int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
355 struct ieee80211_ht_info *ht_info)
356{
357
358 if (ht_info == NULL)
359 return -EINVAL;
360
361 memset(ht_info, 0, sizeof(*ht_info));
362
363 if (ht_cap_ie) {
364 u8 ampdu_info = ht_cap_ie->ampdu_params_info;
365
366 ht_info->ht_supported = 1;
367 ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info);
368 ht_info->ampdu_factor =
369 ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR;
370 ht_info->ampdu_density =
371 (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2;
372 memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16);
373 } else
374 ht_info->ht_supported = 0;
375
376 return 0;
333} 377}
334 378
379int ieee80211_ht_addt_info_ie_to_ht_bss_info(
380 struct ieee80211_ht_addt_info *ht_add_info_ie,
381 struct ieee80211_ht_bss_info *bss_info)
382{
383 if (bss_info == NULL)
384 return -EINVAL;
385
386 memset(bss_info, 0, sizeof(*bss_info));
387
388 if (ht_add_info_ie) {
389 u16 op_mode;
390 op_mode = le16_to_cpu(ht_add_info_ie->operation_mode);
391
392 bss_info->primary_channel = ht_add_info_ie->control_chan;
393 bss_info->bss_cap = ht_add_info_ie->ht_param;
394 bss_info->bss_op_mode = (u8)(op_mode & 0xff);
395 }
396
397 return 0;
398}
335 399
336static void ieee80211_sta_send_associnfo(struct net_device *dev, 400static void ieee80211_sta_send_associnfo(struct net_device *dev,
337 struct ieee80211_if_sta *ifsta) 401 struct ieee80211_if_sta *ifsta)
@@ -388,20 +452,17 @@ static void ieee80211_set_associated(struct net_device *dev,
388 struct ieee80211_if_sta *ifsta, 452 struct ieee80211_if_sta *ifsta,
389 bool assoc) 453 bool assoc)
390{ 454{
391 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 455 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
456 struct ieee80211_local *local = sdata->local;
392 union iwreq_data wrqu; 457 union iwreq_data wrqu;
393 458 u32 changed = BSS_CHANGED_ASSOC;
394 if (!!(ifsta->flags & IEEE80211_STA_ASSOCIATED) == assoc)
395 return;
396 459
397 if (assoc) { 460 if (assoc) {
398 struct ieee80211_sub_if_data *sdata;
399 struct ieee80211_sta_bss *bss; 461 struct ieee80211_sta_bss *bss;
400 462
401 ifsta->flags |= IEEE80211_STA_ASSOCIATED; 463 ifsta->flags |= IEEE80211_STA_ASSOCIATED;
402 464
403 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 465 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
404 if (sdata->type != IEEE80211_IF_TYPE_STA)
405 return; 466 return;
406 467
407 bss = ieee80211_rx_bss_get(dev, ifsta->bssid, 468 bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
@@ -409,7 +470,8 @@ static void ieee80211_set_associated(struct net_device *dev,
409 ifsta->ssid, ifsta->ssid_len); 470 ifsta->ssid, ifsta->ssid_len);
410 if (bss) { 471 if (bss) {
411 if (bss->has_erp_value) 472 if (bss->has_erp_value)
412 ieee80211_handle_erp_ie(dev, bss->erp_value); 473 changed |= ieee80211_handle_erp_ie(
474 sdata, bss->erp_value);
413 ieee80211_rx_bss_put(dev, bss); 475 ieee80211_rx_bss_put(dev, bss);
414 } 476 }
415 477
@@ -429,6 +491,8 @@ static void ieee80211_set_associated(struct net_device *dev,
429 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); 491 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
430 ifsta->last_probe = jiffies; 492 ifsta->last_probe = jiffies;
431 ieee80211_led_assoc(local, assoc); 493 ieee80211_led_assoc(local, assoc);
494
495 ieee80211_bss_info_change_notify(sdata, changed);
432} 496}
433 497
434static void ieee80211_set_disassoc(struct net_device *dev, 498static void ieee80211_set_disassoc(struct net_device *dev,
@@ -630,6 +694,19 @@ static void ieee80211_send_assoc(struct net_device *dev,
630 *pos++ = 1; /* WME ver */ 694 *pos++ = 1; /* WME ver */
631 *pos++ = 0; 695 *pos++ = 0;
632 } 696 }
697 /* wmm support is a must to HT */
698 if (wmm && mode->ht_info.ht_supported) {
699 __le16 tmp = cpu_to_le16(mode->ht_info.cap);
700 pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
701 *pos++ = WLAN_EID_HT_CAPABILITY;
702 *pos++ = sizeof(struct ieee80211_ht_cap);
703 memset(pos, 0, sizeof(struct ieee80211_ht_cap));
704 memcpy(pos, &tmp, sizeof(u16));
705 pos += sizeof(u16);
706 *pos++ = (mode->ht_info.ampdu_factor |
707 (mode->ht_info.ampdu_density << 2));
708 memcpy(pos, mode->ht_info.supp_mcs_set, 16);
709 }
633 710
634 kfree(ifsta->assocreq_ies); 711 kfree(ifsta->assocreq_ies);
635 ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; 712 ifsta->assocreq_ies_len = (skb->data + skb->len) - ies;
@@ -704,10 +781,11 @@ static int ieee80211_privacy_mismatch(struct net_device *dev,
704{ 781{
705 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 782 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
706 struct ieee80211_sta_bss *bss; 783 struct ieee80211_sta_bss *bss;
707 int res = 0; 784 int bss_privacy;
785 int wep_privacy;
786 int privacy_invoked;
708 787
709 if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL) || 788 if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL))
710 ifsta->key_management_enabled)
711 return 0; 789 return 0;
712 790
713 bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel, 791 bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel,
@@ -715,13 +793,16 @@ static int ieee80211_privacy_mismatch(struct net_device *dev,
715 if (!bss) 793 if (!bss)
716 return 0; 794 return 0;
717 795
718 if (ieee80211_sta_wep_configured(dev) != 796 bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY);
719 !!(bss->capability & WLAN_CAPABILITY_PRIVACY)) 797 wep_privacy = !!ieee80211_sta_wep_configured(dev);
720 res = 1; 798 privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED);
721 799
722 ieee80211_rx_bss_put(dev, bss); 800 ieee80211_rx_bss_put(dev, bss);
723 801
724 return res; 802 if ((bss_privacy == wep_privacy) || (bss_privacy == privacy_invoked))
803 return 0;
804
805 return 1;
725} 806}
726 807
727 808
@@ -804,12 +885,8 @@ static void ieee80211_associated(struct net_device *dev,
804 sta_info_put(sta); 885 sta_info_put(sta);
805 } 886 }
806 if (disassoc) { 887 if (disassoc) {
807 union iwreq_data wrqu; 888 ifsta->state = IEEE80211_DISABLED;
808 memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); 889 ieee80211_set_associated(dev, ifsta, 0);
809 wrqu.ap_addr.sa_family = ARPHRD_ETHER;
810 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
811 mod_timer(&ifsta->timer, jiffies +
812 IEEE80211_MONITORING_INTERVAL + 30 * HZ);
813 } else { 890 } else {
814 mod_timer(&ifsta->timer, jiffies + 891 mod_timer(&ifsta->timer, jiffies +
815 IEEE80211_MONITORING_INTERVAL); 892 IEEE80211_MONITORING_INTERVAL);
@@ -918,6 +995,320 @@ static void ieee80211_auth_challenge(struct net_device *dev,
918 elems.challenge_len + 2, 1); 995 elems.challenge_len + 2, 1);
919} 996}
920 997
998static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
999 u8 dialog_token, u16 status, u16 policy,
1000 u16 buf_size, u16 timeout)
1001{
1002 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1003 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
1004 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1005 struct sk_buff *skb;
1006 struct ieee80211_mgmt *mgmt;
1007 u16 capab;
1008
1009 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
1010 sizeof(mgmt->u.action.u.addba_resp));
1011 if (!skb) {
1012 printk(KERN_DEBUG "%s: failed to allocate buffer "
1013 "for addba resp frame\n", dev->name);
1014 return;
1015 }
1016
1017 skb_reserve(skb, local->hw.extra_tx_headroom);
1018 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
1019 memset(mgmt, 0, 24);
1020 memcpy(mgmt->da, da, ETH_ALEN);
1021 memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
1022 if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
1023 memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
1024 else
1025 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
1026 mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
1027 IEEE80211_STYPE_ACTION);
1028
1029 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
1030 mgmt->u.action.category = WLAN_CATEGORY_BACK;
1031 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
1032 mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
1033
1034 capab = (u16)(policy << 1); /* bit 1 aggregation policy */
1035 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
1036 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
1037
1038 mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
1039 mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
1040 mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
1041
1042 ieee80211_sta_tx(dev, skb, 0);
1043
1044 return;
1045}
1046
1047static void ieee80211_sta_process_addba_request(struct net_device *dev,
1048 struct ieee80211_mgmt *mgmt,
1049 size_t len)
1050{
1051 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1052 struct ieee80211_hw *hw = &local->hw;
1053 struct ieee80211_conf *conf = &hw->conf;
1054 struct sta_info *sta;
1055 struct tid_ampdu_rx *tid_agg_rx;
1056 u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
1057 u8 dialog_token;
1058 int ret = -EOPNOTSUPP;
1059 DECLARE_MAC_BUF(mac);
1060
1061 sta = sta_info_get(local, mgmt->sa);
1062 if (!sta)
1063 return;
1064
1065 /* extract session parameters from addba request frame */
1066 dialog_token = mgmt->u.action.u.addba_req.dialog_token;
1067 timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
1068 start_seq_num =
1069 le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
1070
1071 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
1072 ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
1073 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
1074 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
1075
1076 status = WLAN_STATUS_REQUEST_DECLINED;
1077
1078 /* sanity check for incoming parameters:
1079 * check if configuration can support the BA policy
1080 * and if buffer size does not exceeds max value */
1081 if (((ba_policy != 1)
1082 && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA)))
1083 || (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
1084 status = WLAN_STATUS_INVALID_QOS_PARAM;
1085#ifdef CONFIG_MAC80211_HT_DEBUG
1086 if (net_ratelimit())
1087 printk(KERN_DEBUG "Block Ack Req with bad params from "
1088 "%s on tid %u. policy %d, buffer size %d\n",
1089 print_mac(mac, mgmt->sa), tid, ba_policy,
1090 buf_size);
1091#endif /* CONFIG_MAC80211_HT_DEBUG */
1092 goto end_no_lock;
1093 }
1094 /* determine default buffer size */
1095 if (buf_size == 0) {
1096 struct ieee80211_hw_mode *mode = conf->mode;
1097 buf_size = IEEE80211_MIN_AMPDU_BUF;
1098 buf_size = buf_size << mode->ht_info.ampdu_factor;
1099 }
1100
1101 tid_agg_rx = &sta->ampdu_mlme.tid_rx[tid];
1102
1103 /* examine state machine */
1104 spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
1105
1106 if (tid_agg_rx->state != HT_AGG_STATE_IDLE) {
1107#ifdef CONFIG_MAC80211_HT_DEBUG
1108 if (net_ratelimit())
1109 printk(KERN_DEBUG "unexpected Block Ack Req from "
1110 "%s on tid %u\n",
1111 print_mac(mac, mgmt->sa), tid);
1112#endif /* CONFIG_MAC80211_HT_DEBUG */
1113 goto end;
1114 }
1115
1116 /* prepare reordering buffer */
1117 tid_agg_rx->reorder_buf =
1118 kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC);
1119 if ((!tid_agg_rx->reorder_buf) && net_ratelimit()) {
1120 printk(KERN_ERR "can not allocate reordering buffer "
1121 "to tid %d\n", tid);
1122 goto end;
1123 }
1124 memset(tid_agg_rx->reorder_buf, 0,
1125 buf_size * sizeof(struct sk_buf *));
1126
1127 if (local->ops->ampdu_action)
1128 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
1129 sta->addr, tid, start_seq_num);
1130#ifdef CONFIG_MAC80211_HT_DEBUG
1131 printk(KERN_DEBUG "Rx A-MPDU on tid %d result %d", tid, ret);
1132#endif /* CONFIG_MAC80211_HT_DEBUG */
1133
1134 if (ret) {
1135 kfree(tid_agg_rx->reorder_buf);
1136 goto end;
1137 }
1138
1139 /* change state and send addba resp */
1140 tid_agg_rx->state = HT_AGG_STATE_OPERATIONAL;
1141 tid_agg_rx->dialog_token = dialog_token;
1142 tid_agg_rx->ssn = start_seq_num;
1143 tid_agg_rx->head_seq_num = start_seq_num;
1144 tid_agg_rx->buf_size = buf_size;
1145 tid_agg_rx->timeout = timeout;
1146 tid_agg_rx->stored_mpdu_num = 0;
1147 status = WLAN_STATUS_SUCCESS;
1148end:
1149 spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
1150
1151end_no_lock:
1152 ieee80211_send_addba_resp(sta->dev, sta->addr, tid, dialog_token,
1153 status, 1, buf_size, timeout);
1154 sta_info_put(sta);
1155}
1156
1157static void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
1158 u16 initiator, u16 reason_code)
1159{
1160 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1161 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1162 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
1163 struct sk_buff *skb;
1164 struct ieee80211_mgmt *mgmt;
1165 u16 params;
1166
1167 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
1168 sizeof(mgmt->u.action.u.delba));
1169
1170 if (!skb) {
1171 printk(KERN_ERR "%s: failed to allocate buffer "
1172 "for delba frame\n", dev->name);
1173 return;
1174 }
1175
1176 skb_reserve(skb, local->hw.extra_tx_headroom);
1177 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
1178 memset(mgmt, 0, 24);
1179 memcpy(mgmt->da, da, ETH_ALEN);
1180 memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
1181 if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
1182 memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
1183 else
1184 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
1185 mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
1186 IEEE80211_STYPE_ACTION);
1187
1188 skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
1189
1190 mgmt->u.action.category = WLAN_CATEGORY_BACK;
1191 mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA;
1192 params = (u16)(initiator << 11); /* bit 11 initiator */
1193 params |= (u16)(tid << 12); /* bit 15:12 TID number */
1194
1195 mgmt->u.action.u.delba.params = cpu_to_le16(params);
1196 mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
1197
1198 ieee80211_sta_tx(dev, skb, 0);
1199}
1200
1201void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
1202 u16 initiator, u16 reason)
1203{
1204 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1205 struct ieee80211_hw *hw = &local->hw;
1206 struct sta_info *sta;
1207 int ret, i;
1208
1209 sta = sta_info_get(local, ra);
1210 if (!sta)
1211 return;
1212
1213 /* check if TID is in operational state */
1214 spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
1215 if (sta->ampdu_mlme.tid_rx[tid].state
1216 != HT_AGG_STATE_OPERATIONAL) {
1217 spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
1218 sta_info_put(sta);
1219 return;
1220 }
1221 sta->ampdu_mlme.tid_rx[tid].state =
1222 HT_AGG_STATE_REQ_STOP_BA_MSK |
1223 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
1224 spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
1225
1226 /* stop HW Rx aggregation. ampdu_action existence
1227 * already verified in session init so we add the BUG_ON */
1228 BUG_ON(!local->ops->ampdu_action);
1229
1230 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
1231 ra, tid, EINVAL);
1232 if (ret)
1233 printk(KERN_DEBUG "HW problem - can not stop rx "
1234 "aggergation for tid %d\n", tid);
1235
1236 /* shutdown timer has not expired */
1237 if (initiator != WLAN_BACK_TIMER)
1238 del_timer_sync(&sta->ampdu_mlme.tid_rx[tid].
1239 session_timer);
1240
1241 /* check if this is a self generated aggregation halt */
1242 if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
1243 ieee80211_send_delba(dev, ra, tid, 0, reason);
1244
1245 /* free the reordering buffer */
1246 for (i = 0; i < sta->ampdu_mlme.tid_rx[tid].buf_size; i++) {
1247 if (sta->ampdu_mlme.tid_rx[tid].reorder_buf[i]) {
1248 /* release the reordered frames */
1249 dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid].reorder_buf[i]);
1250 sta->ampdu_mlme.tid_rx[tid].stored_mpdu_num--;
1251 sta->ampdu_mlme.tid_rx[tid].reorder_buf[i] = NULL;
1252 }
1253 }
1254 kfree(sta->ampdu_mlme.tid_rx[tid].reorder_buf);
1255
1256 sta->ampdu_mlme.tid_rx[tid].state = HT_AGG_STATE_IDLE;
1257 sta_info_put(sta);
1258}
1259
1260static void ieee80211_sta_process_delba(struct net_device *dev,
1261 struct ieee80211_mgmt *mgmt, size_t len)
1262{
1263 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1264 struct sta_info *sta;
1265 u16 tid, params;
1266 u16 initiator;
1267 DECLARE_MAC_BUF(mac);
1268
1269 sta = sta_info_get(local, mgmt->sa);
1270 if (!sta)
1271 return;
1272
1273 params = le16_to_cpu(mgmt->u.action.u.delba.params);
1274 tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12;
1275 initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11;
1276
1277#ifdef CONFIG_MAC80211_HT_DEBUG
1278 if (net_ratelimit())
1279 printk(KERN_DEBUG "delba from %s on tid %d reason code %d\n",
1280 print_mac(mac, mgmt->sa), tid,
1281 mgmt->u.action.u.delba.reason_code);
1282#endif /* CONFIG_MAC80211_HT_DEBUG */
1283
1284 if (initiator == WLAN_BACK_INITIATOR)
1285 ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid,
1286 WLAN_BACK_INITIATOR, 0);
1287 sta_info_put(sta);
1288}
1289
1290/*
1291 * After receiving Block Ack Request (BAR) we activated a
1292 * timer after each frame arrives from the originator.
1293 * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
1294 */
1295void sta_rx_agg_session_timer_expired(unsigned long data)
1296{
1297 /* not an elegant detour, but there is no choice as the timer passes
1298 * only one argument, and verious sta_info are needed here, so init
1299 * flow in sta_info_add gives the TID as data, while the timer_to_id
1300 * array gives the sta through container_of */
1301 u8 *ptid = (u8 *)data;
1302 u8 *timer_to_id = ptid - *ptid;
1303 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
1304 timer_to_tid[0]);
1305
1306 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
1307 ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr, (u16)*ptid,
1308 WLAN_BACK_TIMER,
1309 WLAN_REASON_QSTA_TIMEOUT);
1310}
1311
921 1312
922static void ieee80211_rx_mgmt_auth(struct net_device *dev, 1313static void ieee80211_rx_mgmt_auth(struct net_device *dev,
923 struct ieee80211_if_sta *ifsta, 1314 struct ieee80211_if_sta *ifsta,
@@ -929,7 +1320,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
929 DECLARE_MAC_BUF(mac); 1320 DECLARE_MAC_BUF(mac);
930 1321
931 if (ifsta->state != IEEE80211_AUTHENTICATE && 1322 if (ifsta->state != IEEE80211_AUTHENTICATE &&
932 sdata->type != IEEE80211_IF_TYPE_IBSS) { 1323 sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
933 printk(KERN_DEBUG "%s: authentication frame received from " 1324 printk(KERN_DEBUG "%s: authentication frame received from "
934 "%s, but not in authenticate state - ignored\n", 1325 "%s, but not in authenticate state - ignored\n",
935 dev->name, print_mac(mac, mgmt->sa)); 1326 dev->name, print_mac(mac, mgmt->sa));
@@ -943,7 +1334,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
943 return; 1334 return;
944 } 1335 }
945 1336
946 if (sdata->type != IEEE80211_IF_TYPE_IBSS && 1337 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
947 memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { 1338 memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
948 printk(KERN_DEBUG "%s: authentication frame received from " 1339 printk(KERN_DEBUG "%s: authentication frame received from "
949 "unknown AP (SA=%s BSSID=%s) - " 1340 "unknown AP (SA=%s BSSID=%s) - "
@@ -952,7 +1343,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
952 return; 1343 return;
953 } 1344 }
954 1345
955 if (sdata->type != IEEE80211_IF_TYPE_IBSS && 1346 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
956 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) { 1347 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) {
957 printk(KERN_DEBUG "%s: authentication frame received from " 1348 printk(KERN_DEBUG "%s: authentication frame received from "
958 "unknown BSSID (SA=%s BSSID=%s) - " 1349 "unknown BSSID (SA=%s BSSID=%s) - "
@@ -970,7 +1361,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
970 dev->name, print_mac(mac, mgmt->sa), auth_alg, 1361 dev->name, print_mac(mac, mgmt->sa), auth_alg,
971 auth_transaction, status_code); 1362 auth_transaction, status_code);
972 1363
973 if (sdata->type == IEEE80211_IF_TYPE_IBSS) { 1364 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
974 /* IEEE 802.11 standard does not require authentication in IBSS 1365 /* IEEE 802.11 standard does not require authentication in IBSS
975 * networks and most implementations do not seem to use it. 1366 * networks and most implementations do not seem to use it.
976 * However, try to reply to authentication attempts if someone 1367 * However, try to reply to authentication attempts if someone
@@ -1136,18 +1527,20 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
1136} 1527}
1137 1528
1138 1529
1139static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, 1530static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1140 struct ieee80211_if_sta *ifsta, 1531 struct ieee80211_if_sta *ifsta,
1141 struct ieee80211_mgmt *mgmt, 1532 struct ieee80211_mgmt *mgmt,
1142 size_t len, 1533 size_t len,
1143 int reassoc) 1534 int reassoc)
1144{ 1535{
1145 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1536 struct ieee80211_local *local = sdata->local;
1537 struct net_device *dev = sdata->dev;
1146 struct ieee80211_hw_mode *mode; 1538 struct ieee80211_hw_mode *mode;
1147 struct sta_info *sta; 1539 struct sta_info *sta;
1148 u32 rates; 1540 u32 rates;
1149 u16 capab_info, status_code, aid; 1541 u16 capab_info, status_code, aid;
1150 struct ieee802_11_elems elems; 1542 struct ieee802_11_elems elems;
1543 struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
1151 u8 *pos; 1544 u8 *pos;
1152 int i, j; 1545 int i, j;
1153 DECLARE_MAC_BUF(mac); 1546 DECLARE_MAC_BUF(mac);
@@ -1210,20 +1603,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
1210 return; 1603 return;
1211 } 1604 }
1212 1605
1213 /* it probably doesn't, but if the frame includes an ERP value then
1214 * update our stored copy */
1215 if (elems.erp_info && elems.erp_info_len >= 1) {
1216 struct ieee80211_sta_bss *bss
1217 = ieee80211_rx_bss_get(dev, ifsta->bssid,
1218 local->hw.conf.channel,
1219 ifsta->ssid, ifsta->ssid_len);
1220 if (bss) {
1221 bss->erp_value = elems.erp_info[0];
1222 bss->has_erp_value = 1;
1223 ieee80211_rx_bss_put(dev, bss);
1224 }
1225 }
1226
1227 printk(KERN_DEBUG "%s: associated\n", dev->name); 1606 printk(KERN_DEBUG "%s: associated\n", dev->name);
1228 ifsta->aid = aid; 1607 ifsta->aid = aid;
1229 ifsta->ap_capab = capab_info; 1608 ifsta->ap_capab = capab_info;
@@ -1234,6 +1613,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
1234 if (ifsta->assocresp_ies) 1613 if (ifsta->assocresp_ies)
1235 memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); 1614 memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len);
1236 1615
1616 /* set AID, ieee80211_set_associated() will tell the driver */
1617 bss_conf->aid = aid;
1237 ieee80211_set_associated(dev, ifsta, 1); 1618 ieee80211_set_associated(dev, ifsta, 1);
1238 1619
1239 /* Add STA entry for the AP */ 1620 /* Add STA entry for the AP */
@@ -1276,6 +1657,19 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
1276 } 1657 }
1277 sta->supp_rates = rates; 1658 sta->supp_rates = rates;
1278 1659
1660 if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
1661 local->ops->conf_ht) {
1662 struct ieee80211_ht_bss_info bss_info;
1663
1664 ieee80211_ht_cap_ie_to_ht_info(
1665 (struct ieee80211_ht_cap *)
1666 elems.ht_cap_elem, &sta->ht_info);
1667 ieee80211_ht_addt_info_ie_to_ht_bss_info(
1668 (struct ieee80211_ht_addt_info *)
1669 elems.ht_info_elem, &bss_info);
1670 ieee80211_hw_config_ht(local, 1, &sta->ht_info, &bss_info);
1671 }
1672
1279 rate_control_rate_init(sta, local); 1673 rate_control_rate_init(sta, local);
1280 1674
1281 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { 1675 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
@@ -1380,6 +1774,7 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
1380 kfree(bss->wpa_ie); 1774 kfree(bss->wpa_ie);
1381 kfree(bss->rsn_ie); 1775 kfree(bss->rsn_ie);
1382 kfree(bss->wmm_ie); 1776 kfree(bss->wmm_ie);
1777 kfree(bss->ht_ie);
1383 kfree(bss); 1778 kfree(bss);
1384} 1779}
1385 1780
@@ -1449,7 +1844,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1449 1844
1450 timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); 1845 timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
1451 1846
1452 if (sdata->type == IEEE80211_IF_TYPE_IBSS && beacon && 1847 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon &&
1453 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) { 1848 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
1454#ifdef CONFIG_MAC80211_IBSS_DEBUG 1849#ifdef CONFIG_MAC80211_IBSS_DEBUG
1455 static unsigned long last_tsf_debug = 0; 1850 static unsigned long last_tsf_debug = 0;
@@ -1474,7 +1869,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1474 1869
1475 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); 1870 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
1476 1871
1477 if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates && 1872 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
1478 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && 1873 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
1479 (sta = sta_info_get(local, mgmt->sa))) { 1874 (sta = sta_info_get(local, mgmt->sa))) {
1480 struct ieee80211_hw_mode *mode; 1875 struct ieee80211_hw_mode *mode;
@@ -1483,8 +1878,18 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1483 u32 supp_rates, prev_rates; 1878 u32 supp_rates, prev_rates;
1484 int i, j; 1879 int i, j;
1485 1880
1486 mode = local->sta_scanning ? 1881 mode = local->sta_sw_scanning ?
1487 local->scan_hw_mode : local->oper_hw_mode; 1882 local->scan_hw_mode : local->oper_hw_mode;
1883
1884 if (local->sta_hw_scanning) {
1885 /* search for the correct mode matches the beacon */
1886 list_for_each_entry(mode, &local->modes_list, list)
1887 if (mode->mode == rx_status->phymode)
1888 break;
1889
1890 if (mode == NULL)
1891 mode = local->oper_hw_mode;
1892 }
1488 rates = mode->rates; 1893 rates = mode->rates;
1489 num_rates = mode->num_rates; 1894 num_rates = mode->num_rates;
1490 1895
@@ -1627,7 +2032,22 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1627 bss->wmm_ie = NULL; 2032 bss->wmm_ie = NULL;
1628 bss->wmm_ie_len = 0; 2033 bss->wmm_ie_len = 0;
1629 } 2034 }
1630 2035 if (elems.ht_cap_elem &&
2036 (!bss->ht_ie || bss->ht_ie_len != elems.ht_cap_elem_len ||
2037 memcmp(bss->ht_ie, elems.ht_cap_elem, elems.ht_cap_elem_len))) {
2038 kfree(bss->ht_ie);
2039 bss->ht_ie = kmalloc(elems.ht_cap_elem_len + 2, GFP_ATOMIC);
2040 if (bss->ht_ie) {
2041 memcpy(bss->ht_ie, elems.ht_cap_elem - 2,
2042 elems.ht_cap_elem_len + 2);
2043 bss->ht_ie_len = elems.ht_cap_elem_len + 2;
2044 } else
2045 bss->ht_ie_len = 0;
2046 } else if (!elems.ht_cap_elem && bss->ht_ie) {
2047 kfree(bss->ht_ie);
2048 bss->ht_ie = NULL;
2049 bss->ht_ie_len = 0;
2050 }
1631 2051
1632 bss->hw_mode = rx_status->phymode; 2052 bss->hw_mode = rx_status->phymode;
1633 bss->freq = rx_status->freq; 2053 bss->freq = rx_status->freq;
@@ -1672,11 +2092,14 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
1672 struct ieee80211_if_sta *ifsta; 2092 struct ieee80211_if_sta *ifsta;
1673 size_t baselen; 2093 size_t baselen;
1674 struct ieee802_11_elems elems; 2094 struct ieee802_11_elems elems;
2095 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
2096 struct ieee80211_conf *conf = &local->hw.conf;
2097 u32 changed = 0;
1675 2098
1676 ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1); 2099 ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1);
1677 2100
1678 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 2101 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1679 if (sdata->type != IEEE80211_IF_TYPE_STA) 2102 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
1680 return; 2103 return;
1681 ifsta = &sdata->u.sta; 2104 ifsta = &sdata->u.sta;
1682 2105
@@ -1692,12 +2115,31 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
1692 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); 2115 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
1693 2116
1694 if (elems.erp_info && elems.erp_info_len >= 1) 2117 if (elems.erp_info && elems.erp_info_len >= 1)
1695 ieee80211_handle_erp_ie(dev, elems.erp_info[0]); 2118 changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]);
2119
2120 if (elems.ht_cap_elem && elems.ht_info_elem &&
2121 elems.wmm_param && local->ops->conf_ht &&
2122 conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
2123 struct ieee80211_ht_bss_info bss_info;
2124
2125 ieee80211_ht_addt_info_ie_to_ht_bss_info(
2126 (struct ieee80211_ht_addt_info *)
2127 elems.ht_info_elem, &bss_info);
2128 /* check if AP changed bss inforamation */
2129 if ((conf->ht_bss_conf.primary_channel !=
2130 bss_info.primary_channel) ||
2131 (conf->ht_bss_conf.bss_cap != bss_info.bss_cap) ||
2132 (conf->ht_bss_conf.bss_op_mode != bss_info.bss_op_mode))
2133 ieee80211_hw_config_ht(local, 1, &conf->ht_conf,
2134 &bss_info);
2135 }
1696 2136
1697 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { 2137 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
1698 ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, 2138 ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
1699 elems.wmm_param_len); 2139 elems.wmm_param_len);
1700 } 2140 }
2141
2142 ieee80211_bss_info_change_notify(sdata, changed);
1701} 2143}
1702 2144
1703 2145
@@ -1719,7 +2161,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
1719 DECLARE_MAC_BUF(mac3); 2161 DECLARE_MAC_BUF(mac3);
1720#endif 2162#endif
1721 2163
1722 if (sdata->type != IEEE80211_IF_TYPE_IBSS || 2164 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS ||
1723 ifsta->state != IEEE80211_IBSS_JOINED || 2165 ifsta->state != IEEE80211_IBSS_JOINED ||
1724 len < 24 + 2 || !ifsta->probe_resp) 2166 len < 24 + 2 || !ifsta->probe_resp)
1725 return; 2167 return;
@@ -1775,6 +2217,40 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
1775 ieee80211_sta_tx(dev, skb, 0); 2217 ieee80211_sta_tx(dev, skb, 0);
1776} 2218}
1777 2219
2220static void ieee80211_rx_mgmt_action(struct net_device *dev,
2221 struct ieee80211_if_sta *ifsta,
2222 struct ieee80211_mgmt *mgmt,
2223 size_t len)
2224{
2225 if (len < IEEE80211_MIN_ACTION_SIZE)
2226 return;
2227
2228 switch (mgmt->u.action.category) {
2229 case WLAN_CATEGORY_BACK:
2230 switch (mgmt->u.action.u.addba_req.action_code) {
2231 case WLAN_ACTION_ADDBA_REQ:
2232 if (len < (IEEE80211_MIN_ACTION_SIZE +
2233 sizeof(mgmt->u.action.u.addba_req)))
2234 break;
2235 ieee80211_sta_process_addba_request(dev, mgmt, len);
2236 break;
2237 case WLAN_ACTION_DELBA:
2238 if (len < (IEEE80211_MIN_ACTION_SIZE +
2239 sizeof(mgmt->u.action.u.delba)))
2240 break;
2241 ieee80211_sta_process_delba(dev, mgmt, len);
2242 break;
2243 default:
2244 if (net_ratelimit())
2245 printk(KERN_DEBUG "%s: Rx unknown A-MPDU action\n",
2246 dev->name);
2247 break;
2248 }
2249 break;
2250 default:
2251 break;
2252 }
2253}
1778 2254
1779void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, 2255void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
1780 struct ieee80211_rx_status *rx_status) 2256 struct ieee80211_rx_status *rx_status)
@@ -1804,6 +2280,7 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
1804 case IEEE80211_STYPE_REASSOC_RESP: 2280 case IEEE80211_STYPE_REASSOC_RESP:
1805 case IEEE80211_STYPE_DEAUTH: 2281 case IEEE80211_STYPE_DEAUTH:
1806 case IEEE80211_STYPE_DISASSOC: 2282 case IEEE80211_STYPE_DISASSOC:
2283 case IEEE80211_STYPE_ACTION:
1807 skb_queue_tail(&ifsta->skb_queue, skb); 2284 skb_queue_tail(&ifsta->skb_queue, skb);
1808 queue_work(local->hw.workqueue, &ifsta->work); 2285 queue_work(local->hw.workqueue, &ifsta->work);
1809 return; 2286 return;
@@ -1850,10 +2327,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
1850 ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len); 2327 ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len);
1851 break; 2328 break;
1852 case IEEE80211_STYPE_ASSOC_RESP: 2329 case IEEE80211_STYPE_ASSOC_RESP:
1853 ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 0); 2330 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0);
1854 break; 2331 break;
1855 case IEEE80211_STYPE_REASSOC_RESP: 2332 case IEEE80211_STYPE_REASSOC_RESP:
1856 ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 1); 2333 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1);
1857 break; 2334 break;
1858 case IEEE80211_STYPE_DEAUTH: 2335 case IEEE80211_STYPE_DEAUTH:
1859 ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len); 2336 ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len);
@@ -1861,37 +2338,48 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
1861 case IEEE80211_STYPE_DISASSOC: 2338 case IEEE80211_STYPE_DISASSOC:
1862 ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len); 2339 ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len);
1863 break; 2340 break;
2341 case IEEE80211_STYPE_ACTION:
2342 ieee80211_rx_mgmt_action(dev, ifsta, mgmt, skb->len);
2343 break;
1864 } 2344 }
1865 2345
1866 kfree_skb(skb); 2346 kfree_skb(skb);
1867} 2347}
1868 2348
1869 2349
1870void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb, 2350ieee80211_txrx_result
1871 struct ieee80211_rx_status *rx_status) 2351ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
2352 struct ieee80211_rx_status *rx_status)
1872{ 2353{
1873 struct ieee80211_mgmt *mgmt; 2354 struct ieee80211_mgmt *mgmt;
1874 u16 fc; 2355 u16 fc;
1875 2356
1876 if (skb->len < 24) { 2357 if (skb->len < 2)
1877 dev_kfree_skb(skb); 2358 return TXRX_DROP;
1878 return;
1879 }
1880 2359
1881 mgmt = (struct ieee80211_mgmt *) skb->data; 2360 mgmt = (struct ieee80211_mgmt *) skb->data;
1882 fc = le16_to_cpu(mgmt->frame_control); 2361 fc = le16_to_cpu(mgmt->frame_control);
1883 2362
2363 if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL)
2364 return TXRX_CONTINUE;
2365
2366 if (skb->len < 24)
2367 return TXRX_DROP;
2368
1884 if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) { 2369 if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) {
1885 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) { 2370 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) {
1886 ieee80211_rx_mgmt_probe_resp(dev, mgmt, 2371 ieee80211_rx_mgmt_probe_resp(dev, mgmt,
1887 skb->len, rx_status); 2372 skb->len, rx_status);
2373 dev_kfree_skb(skb);
2374 return TXRX_QUEUED;
1888 } else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) { 2375 } else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) {
1889 ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, 2376 ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len,
1890 rx_status); 2377 rx_status);
2378 dev_kfree_skb(skb);
2379 return TXRX_QUEUED;
1891 } 2380 }
1892 } 2381 }
1893 2382 return TXRX_CONTINUE;
1894 dev_kfree_skb(skb);
1895} 2383}
1896 2384
1897 2385
@@ -1981,13 +2469,13 @@ void ieee80211_sta_work(struct work_struct *work)
1981 if (!netif_running(dev)) 2469 if (!netif_running(dev))
1982 return; 2470 return;
1983 2471
1984 if (local->sta_scanning) 2472 if (local->sta_sw_scanning || local->sta_hw_scanning)
1985 return; 2473 return;
1986 2474
1987 if (sdata->type != IEEE80211_IF_TYPE_STA && 2475 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
1988 sdata->type != IEEE80211_IF_TYPE_IBSS) { 2476 sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
1989 printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface " 2477 printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface "
1990 "(type=%d)\n", dev->name, sdata->type); 2478 "(type=%d)\n", dev->name, sdata->vif.type);
1991 return; 2479 return;
1992 } 2480 }
1993 ifsta = &sdata->u.sta; 2481 ifsta = &sdata->u.sta;
@@ -1998,7 +2486,10 @@ void ieee80211_sta_work(struct work_struct *work)
1998 if (ifsta->state != IEEE80211_AUTHENTICATE && 2486 if (ifsta->state != IEEE80211_AUTHENTICATE &&
1999 ifsta->state != IEEE80211_ASSOCIATE && 2487 ifsta->state != IEEE80211_ASSOCIATE &&
2000 test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { 2488 test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
2001 ieee80211_sta_start_scan(dev, NULL, 0); 2489 if (ifsta->scan_ssid_len)
2490 ieee80211_sta_start_scan(dev, ifsta->scan_ssid, ifsta->scan_ssid_len);
2491 else
2492 ieee80211_sta_start_scan(dev, NULL, 0);
2002 return; 2493 return;
2003 } 2494 }
2004 2495
@@ -2079,7 +2570,7 @@ void ieee80211_sta_req_auth(struct net_device *dev,
2079 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 2570 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
2080 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 2571 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2081 2572
2082 if (sdata->type != IEEE80211_IF_TYPE_STA) 2573 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
2083 return; 2574 return;
2084 2575
2085 if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | 2576 if ((ifsta->flags & (IEEE80211_STA_BSSID_SET |
@@ -2201,9 +2692,8 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2201 struct sk_buff *skb; 2692 struct sk_buff *skb;
2202 struct ieee80211_mgmt *mgmt; 2693 struct ieee80211_mgmt *mgmt;
2203 struct ieee80211_tx_control control; 2694 struct ieee80211_tx_control control;
2204 struct ieee80211_rate *rate;
2205 struct ieee80211_hw_mode *mode; 2695 struct ieee80211_hw_mode *mode;
2206 struct rate_control_extra extra; 2696 struct rate_selection ratesel;
2207 u8 *pos; 2697 u8 *pos;
2208 struct ieee80211_sub_if_data *sdata; 2698 struct ieee80211_sub_if_data *sdata;
2209 2699
@@ -2288,18 +2778,17 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2288 } 2778 }
2289 2779
2290 memset(&control, 0, sizeof(control)); 2780 memset(&control, 0, sizeof(control));
2291 memset(&extra, 0, sizeof(extra)); 2781 rate_control_get_rate(dev, local->oper_hw_mode, skb, &ratesel);
2292 extra.mode = local->oper_hw_mode; 2782 if (!ratesel.rate) {
2293 rate = rate_control_get_rate(local, dev, skb, &extra);
2294 if (!rate) {
2295 printk(KERN_DEBUG "%s: Failed to determine TX rate " 2783 printk(KERN_DEBUG "%s: Failed to determine TX rate "
2296 "for IBSS beacon\n", dev->name); 2784 "for IBSS beacon\n", dev->name);
2297 break; 2785 break;
2298 } 2786 }
2787 control.vif = &sdata->vif;
2299 control.tx_rate = 2788 control.tx_rate =
2300 ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && 2789 (sdata->bss_conf.use_short_preamble &&
2301 (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? 2790 (ratesel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
2302 rate->val2 : rate->val; 2791 ratesel.rate->val2 : ratesel.rate->val;
2303 control.antenna_sel_tx = local->hw.conf.antenna_sel_tx; 2792 control.antenna_sel_tx = local->hw.conf.antenna_sel_tx;
2304 control.power_level = local->hw.conf.power_level; 2793 control.power_level = local->hw.conf.power_level;
2305 control.flags |= IEEE80211_TXCTL_NO_ACK; 2794 control.flags |= IEEE80211_TXCTL_NO_ACK;
@@ -2549,7 +3038,7 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
2549 ifsta->flags |= IEEE80211_STA_SSID_SET; 3038 ifsta->flags |= IEEE80211_STA_SSID_SET;
2550 else 3039 else
2551 ifsta->flags &= ~IEEE80211_STA_SSID_SET; 3040 ifsta->flags &= ~IEEE80211_STA_SSID_SET;
2552 if (sdata->type == IEEE80211_IF_TYPE_IBSS && 3041 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
2553 !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { 3042 !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
2554 ifsta->ibss_join_req = jiffies; 3043 ifsta->ibss_join_req = jiffies;
2555 ifsta->state = IEEE80211_IBSS_SEARCH; 3044 ifsta->state = IEEE80211_IBSS_SEARCH;
@@ -2636,11 +3125,17 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2636 union iwreq_data wrqu; 3125 union iwreq_data wrqu;
2637 3126
2638 local->last_scan_completed = jiffies; 3127 local->last_scan_completed = jiffies;
2639 wmb(); 3128 memset(&wrqu, 0, sizeof(wrqu));
2640 local->sta_scanning = 0; 3129 wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
3130
3131 if (local->sta_hw_scanning) {
3132 local->sta_hw_scanning = 0;
3133 goto done;
3134 }
2641 3135
3136 local->sta_sw_scanning = 0;
2642 if (ieee80211_hw_config(local)) 3137 if (ieee80211_hw_config(local))
2643 printk(KERN_DEBUG "%s: failed to restore operational" 3138 printk(KERN_DEBUG "%s: failed to restore operational "
2644 "channel after scan\n", dev->name); 3139 "channel after scan\n", dev->name);
2645 3140
2646 3141
@@ -2654,9 +3149,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2654 3149
2655 netif_tx_unlock_bh(local->mdev); 3150 netif_tx_unlock_bh(local->mdev);
2656 3151
2657 memset(&wrqu, 0, sizeof(wrqu));
2658 wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
2659
2660 rcu_read_lock(); 3152 rcu_read_lock();
2661 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 3153 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2662 3154
@@ -2664,7 +3156,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2664 if (sdata->dev == local->mdev) 3156 if (sdata->dev == local->mdev)
2665 continue; 3157 continue;
2666 3158
2667 if (sdata->type == IEEE80211_IF_TYPE_STA) { 3159 if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
2668 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) 3160 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)
2669 ieee80211_send_nullfunc(local, sdata, 0); 3161 ieee80211_send_nullfunc(local, sdata, 0);
2670 ieee80211_sta_timer((unsigned long)sdata); 3162 ieee80211_sta_timer((unsigned long)sdata);
@@ -2674,8 +3166,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2674 } 3166 }
2675 rcu_read_unlock(); 3167 rcu_read_unlock();
2676 3168
3169done:
2677 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 3170 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2678 if (sdata->type == IEEE80211_IF_TYPE_IBSS) { 3171 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
2679 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 3172 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
2680 if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || 3173 if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
2681 (!ifsta->state == IEEE80211_IBSS_JOINED && 3174 (!ifsta->state == IEEE80211_IBSS_JOINED &&
@@ -2696,7 +3189,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
2696 int skip; 3189 int skip;
2697 unsigned long next_delay = 0; 3190 unsigned long next_delay = 0;
2698 3191
2699 if (!local->sta_scanning) 3192 if (!local->sta_sw_scanning)
2700 return; 3193 return;
2701 3194
2702 switch (local->scan_state) { 3195 switch (local->scan_state) {
@@ -2710,7 +3203,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
2710 skip = !(local->enabled_modes & (1 << mode->mode)); 3203 skip = !(local->enabled_modes & (1 << mode->mode));
2711 chan = &mode->channels[local->scan_channel_idx]; 3204 chan = &mode->channels[local->scan_channel_idx];
2712 if (!(chan->flag & IEEE80211_CHAN_W_SCAN) || 3205 if (!(chan->flag & IEEE80211_CHAN_W_SCAN) ||
2713 (sdata->type == IEEE80211_IF_TYPE_IBSS && 3206 (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
2714 !(chan->flag & IEEE80211_CHAN_W_IBSS)) || 3207 !(chan->flag & IEEE80211_CHAN_W_IBSS)) ||
2715 (local->hw_modes & local->enabled_modes & 3208 (local->hw_modes & local->enabled_modes &
2716 (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B)) 3209 (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B))
@@ -2759,7 +3252,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
2759 break; 3252 break;
2760 } 3253 }
2761 3254
2762 if (local->sta_scanning) 3255 if (local->sta_sw_scanning)
2763 queue_delayed_work(local->hw.workqueue, &local->scan_work, 3256 queue_delayed_work(local->hw.workqueue, &local->scan_work,
2764 next_delay); 3257 next_delay);
2765} 3258}
@@ -2791,7 +3284,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
2791 * ResultCode: SUCCESS, INVALID_PARAMETERS 3284 * ResultCode: SUCCESS, INVALID_PARAMETERS
2792 */ 3285 */
2793 3286
2794 if (local->sta_scanning) { 3287 if (local->sta_sw_scanning || local->sta_hw_scanning) {
2795 if (local->scan_dev == dev) 3288 if (local->scan_dev == dev)
2796 return 0; 3289 return 0;
2797 return -EBUSY; 3290 return -EBUSY;
@@ -2799,15 +3292,15 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
2799 3292
2800 if (local->ops->hw_scan) { 3293 if (local->ops->hw_scan) {
2801 int rc = local->ops->hw_scan(local_to_hw(local), 3294 int rc = local->ops->hw_scan(local_to_hw(local),
2802 ssid, ssid_len); 3295 ssid, ssid_len);
2803 if (!rc) { 3296 if (!rc) {
2804 local->sta_scanning = 1; 3297 local->sta_hw_scanning = 1;
2805 local->scan_dev = dev; 3298 local->scan_dev = dev;
2806 } 3299 }
2807 return rc; 3300 return rc;
2808 } 3301 }
2809 3302
2810 local->sta_scanning = 1; 3303 local->sta_sw_scanning = 1;
2811 3304
2812 rcu_read_lock(); 3305 rcu_read_lock();
2813 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 3306 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -2818,7 +3311,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
2818 continue; 3311 continue;
2819 3312
2820 netif_stop_queue(sdata->dev); 3313 netif_stop_queue(sdata->dev);
2821 if (sdata->type == IEEE80211_IF_TYPE_STA && 3314 if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
2822 (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) 3315 (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED))
2823 ieee80211_send_nullfunc(local, sdata, 1); 3316 ieee80211_send_nullfunc(local, sdata, 1);
2824 } 3317 }
@@ -2859,15 +3352,18 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
2859 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 3352 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
2860 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 3353 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
2861 3354
2862 if (sdata->type != IEEE80211_IF_TYPE_STA) 3355 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
2863 return ieee80211_sta_start_scan(dev, ssid, ssid_len); 3356 return ieee80211_sta_start_scan(dev, ssid, ssid_len);
2864 3357
2865 if (local->sta_scanning) { 3358 if (local->sta_sw_scanning || local->sta_hw_scanning) {
2866 if (local->scan_dev == dev) 3359 if (local->scan_dev == dev)
2867 return 0; 3360 return 0;
2868 return -EBUSY; 3361 return -EBUSY;
2869 } 3362 }
2870 3363
3364 ifsta->scan_ssid_len = ssid_len;
3365 if (ssid_len)
3366 memcpy(ifsta->scan_ssid, ssid, ssid_len);
2871 set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); 3367 set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
2872 queue_work(local->hw.workqueue, &ifsta->work); 3368 queue_work(local->hw.workqueue, &ifsta->work);
2873 return 0; 3369 return 0;
@@ -2888,15 +3384,6 @@ ieee80211_sta_scan_result(struct net_device *dev,
2888 if (!(local->enabled_modes & (1 << bss->hw_mode))) 3384 if (!(local->enabled_modes & (1 << bss->hw_mode)))
2889 return current_ev; 3385 return current_ev;
2890 3386
2891 if (local->scan_flags & IEEE80211_SCAN_WPA_ONLY &&
2892 !bss->wpa_ie && !bss->rsn_ie)
2893 return current_ev;
2894
2895 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID &&
2896 (local->scan_ssid_len != bss->ssid_len ||
2897 memcmp(local->scan_ssid, bss->ssid, bss->ssid_len) != 0))
2898 return current_ev;
2899
2900 memset(&iwe, 0, sizeof(iwe)); 3387 memset(&iwe, 0, sizeof(iwe));
2901 iwe.cmd = SIOCGIWAP; 3388 iwe.cmd = SIOCGIWAP;
2902 iwe.u.ap_addr.sa_family = ARPHRD_ETHER; 3389 iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
@@ -3000,34 +3487,6 @@ ieee80211_sta_scan_result(struct net_device *dev,
3000 } 3487 }
3001 } 3488 }
3002 3489
3003 do {
3004 char *buf;
3005
3006 if (!(local->scan_flags & IEEE80211_SCAN_EXTRA_INFO))
3007 break;
3008
3009 buf = kmalloc(100, GFP_ATOMIC);
3010 if (!buf)
3011 break;
3012
3013 memset(&iwe, 0, sizeof(iwe));
3014 iwe.cmd = IWEVCUSTOM;
3015 sprintf(buf, "bcn_int=%d", bss->beacon_int);
3016 iwe.u.data.length = strlen(buf);
3017 current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
3018 buf);
3019
3020 memset(&iwe, 0, sizeof(iwe));
3021 iwe.cmd = IWEVCUSTOM;
3022 sprintf(buf, "capab=0x%04x", bss->capability);
3023 iwe.u.data.length = strlen(buf);
3024 current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
3025 buf);
3026
3027 kfree(buf);
3028 break;
3029 } while (0);
3030
3031 return current_ev; 3490 return current_ev;
3032} 3491}
3033 3492
@@ -3116,8 +3575,8 @@ int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason)
3116 printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n", 3575 printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n",
3117 dev->name, reason); 3576 dev->name, reason);
3118 3577
3119 if (sdata->type != IEEE80211_IF_TYPE_STA && 3578 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
3120 sdata->type != IEEE80211_IF_TYPE_IBSS) 3579 sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
3121 return -EINVAL; 3580 return -EINVAL;
3122 3581
3123 ieee80211_send_deauth(dev, ifsta, reason); 3582 ieee80211_send_deauth(dev, ifsta, reason);
@@ -3134,7 +3593,7 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason)
3134 printk(KERN_DEBUG "%s: disassociate(reason=%d)\n", 3593 printk(KERN_DEBUG "%s: disassociate(reason=%d)\n",
3135 dev->name, reason); 3594 dev->name, reason);
3136 3595
3137 if (sdata->type != IEEE80211_IF_TYPE_STA) 3596 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
3138 return -EINVAL; 3597 return -EINVAL;
3139 3598
3140 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) 3599 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED))
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 0b2328f7d67c..ed57fb8e82fc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -49,8 +49,8 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key)
49 * address to indicate a transmit-only key. 49 * address to indicate a transmit-only key.
50 */ 50 */
51 if (key->conf.alg != ALG_WEP && 51 if (key->conf.alg != ALG_WEP &&
52 (key->sdata->type == IEEE80211_IF_TYPE_AP || 52 (key->sdata->vif.type == IEEE80211_IF_TYPE_AP ||
53 key->sdata->type == IEEE80211_IF_TYPE_VLAN)) 53 key->sdata->vif.type == IEEE80211_IF_TYPE_VLAN))
54 addr = zero_addr; 54 addr = zero_addr;
55 55
56 if (key->sta) 56 if (key->sta)
@@ -172,7 +172,7 @@ struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata,
172 if (sta->flags & WLAN_STA_WME) 172 if (sta->flags & WLAN_STA_WME)
173 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; 173 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
174 } else { 174 } else {
175 if (sdata->type == IEEE80211_IF_TYPE_STA) { 175 if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
176 struct sta_info *ap; 176 struct sta_info *ap;
177 177
178 /* same here, the AP could be using QoS */ 178 /* same here, the AP could be using QoS */
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
new file mode 100644
index 000000000000..04afc13ed825
--- /dev/null
+++ b/net/mac80211/rc80211_pid.h
@@ -0,0 +1,285 @@
1/*
2 * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
3 * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef RC80211_PID_H
11#define RC80211_PID_H
12
13/* Sampling period for measuring percentage of failed frames in ms. */
14#define RC_PID_INTERVAL 125
15
16/* Exponential averaging smoothness (used for I part of PID controller) */
17#define RC_PID_SMOOTHING_SHIFT 3
18#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
19
20/* Sharpening factor (used for D part of PID controller) */
21#define RC_PID_SHARPENING_FACTOR 0
22#define RC_PID_SHARPENING_DURATION 0
23
24/* Fixed point arithmetic shifting amount. */
25#define RC_PID_ARITH_SHIFT 8
26
27/* Fixed point arithmetic factor. */
28#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
29
30/* Proportional PID component coefficient. */
31#define RC_PID_COEFF_P 15
32/* Integral PID component coefficient. */
33#define RC_PID_COEFF_I 9
34/* Derivative PID component coefficient. */
35#define RC_PID_COEFF_D 15
36
37/* Target failed frames rate for the PID controller. NB: This effectively gives
38 * maximum failed frames percentage we're willing to accept. If the wireless
39 * link quality is good, the controller will fail to adjust failed frames
40 * percentage to the target. This is intentional.
41 */
42#define RC_PID_TARGET_PF 14
43
44/* Rate behaviour normalization quantity over time. */
45#define RC_PID_NORM_OFFSET 3
46
47/* Push high rates right after loading. */
48#define RC_PID_FAST_START 0
49
50/* Arithmetic right shift for positive and negative values for ISO C. */
51#define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \
52 (x) < 0 ? -((-(x)) >> (y)) : (x) >> (y)
53
54enum rc_pid_event_type {
55 RC_PID_EVENT_TYPE_TX_STATUS,
56 RC_PID_EVENT_TYPE_RATE_CHANGE,
57 RC_PID_EVENT_TYPE_TX_RATE,
58 RC_PID_EVENT_TYPE_PF_SAMPLE,
59};
60
61union rc_pid_event_data {
62 /* RC_PID_EVENT_TX_STATUS */
63 struct {
64 struct ieee80211_tx_status tx_status;
65 };
66 /* RC_PID_EVENT_TYPE_RATE_CHANGE */
67 /* RC_PID_EVENT_TYPE_TX_RATE */
68 struct {
69 int index;
70 int rate;
71 };
72 /* RC_PID_EVENT_TYPE_PF_SAMPLE */
73 struct {
74 s32 pf_sample;
75 s32 prop_err;
76 s32 int_err;
77 s32 der_err;
78 };
79};
80
81struct rc_pid_event {
82 /* The time when the event occured */
83 unsigned long timestamp;
84
85 /* Event ID number */
86 unsigned int id;
87
88 /* Type of event */
89 enum rc_pid_event_type type;
90
91 /* type specific data */
92 union rc_pid_event_data data;
93};
94
95/* Size of the event ring buffer. */
96#define RC_PID_EVENT_RING_SIZE 32
97
98struct rc_pid_event_buffer {
99 /* Counter that generates event IDs */
100 unsigned int ev_count;
101
102 /* Ring buffer of events */
103 struct rc_pid_event ring[RC_PID_EVENT_RING_SIZE];
104
105 /* Index to the entry in events_buf to be reused */
106 unsigned int next_entry;
107
108 /* Lock that guards against concurrent access to this buffer struct */
109 spinlock_t lock;
110
111 /* Wait queue for poll/select and blocking I/O */
112 wait_queue_head_t waitqueue;
113};
114
115struct rc_pid_events_file_info {
116 /* The event buffer we read */
117 struct rc_pid_event_buffer *events;
118
119 /* The entry we have should read next */
120 unsigned int next_entry;
121};
122
123/**
124 * struct rc_pid_debugfs_entries - tunable parameters
125 *
126 * Algorithm parameters, tunable via debugfs.
127 * @dir: the debugfs directory for a specific phy
128 * @target: target percentage for failed frames
129 * @sampling_period: error sampling interval in milliseconds
130 * @coeff_p: absolute value of the proportional coefficient
131 * @coeff_i: absolute value of the integral coefficient
132 * @coeff_d: absolute value of the derivative coefficient
133 * @smoothing_shift: absolute value of the integral smoothing factor (i.e.
134 * amount of smoothing introduced by the exponential moving average)
135 * @sharpen_factor: absolute value of the derivative sharpening factor (i.e.
136 * amount of emphasis given to the derivative term after low activity
137 * events)
138 * @sharpen_duration: duration of the sharpening effect after the detected low
139 * activity event, relative to sampling_period
140 * @norm_offset: amount of normalization periodically performed on the learnt
141 * rate behaviour values (lower means we should trust more what we learnt
142 * about behaviour of rates, higher means we should trust more the natural
143 * ordering of rates)
144 * @fast_start: if Y, push high rates right after initialization
145 */
146struct rc_pid_debugfs_entries {
147 struct dentry *dir;
148 struct dentry *target;
149 struct dentry *sampling_period;
150 struct dentry *coeff_p;
151 struct dentry *coeff_i;
152 struct dentry *coeff_d;
153 struct dentry *smoothing_shift;
154 struct dentry *sharpen_factor;
155 struct dentry *sharpen_duration;
156 struct dentry *norm_offset;
157 struct dentry *fast_start;
158};
159
160void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
161 struct ieee80211_tx_status *stat);
162
163void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
164 int index, int rate);
165
166void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
167 int index, int rate);
168
169void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
170 s32 pf_sample, s32 prop_err,
171 s32 int_err, s32 der_err);
172
173void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
174 struct dentry *dir);
175
176void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta);
177
178struct rc_pid_sta_info {
179 unsigned long last_change;
180 unsigned long last_sample;
181
182 u32 tx_num_failed;
183 u32 tx_num_xmit;
184
185 /* Average failed frames percentage error (i.e. actual vs. target
186 * percentage), scaled by RC_PID_SMOOTHING. This value is computed
187 * using using an exponential weighted average technique:
188 *
189 * (RC_PID_SMOOTHING - 1) * err_avg_old + err
190 * err_avg = ------------------------------------------
191 * RC_PID_SMOOTHING
192 *
193 * where err_avg is the new approximation, err_avg_old the previous one
194 * and err is the error w.r.t. to the current failed frames percentage
195 * sample. Note that the bigger RC_PID_SMOOTHING the more weight is
196 * given to the previous estimate, resulting in smoother behavior (i.e.
197 * corresponding to a longer integration window).
198 *
199 * For computation, we actually don't use the above formula, but this
200 * one:
201 *
202 * err_avg_scaled = err_avg_old_scaled - err_avg_old + err
203 *
204 * where:
205 * err_avg_scaled = err * RC_PID_SMOOTHING
206 * err_avg_old_scaled = err_avg_old * RC_PID_SMOOTHING
207 *
208 * This avoids floating point numbers and the per_failed_old value can
209 * easily be obtained by shifting per_failed_old_scaled right by
210 * RC_PID_SMOOTHING_SHIFT.
211 */
212 s32 err_avg_sc;
213
214 /* Last framed failes percentage sample. */
215 u32 last_pf;
216
217 /* Sharpening needed. */
218 u8 sharp_cnt;
219
220#ifdef CONFIG_MAC80211_DEBUGFS
221 /* Event buffer */
222 struct rc_pid_event_buffer events;
223
224 /* Events debugfs file entry */
225 struct dentry *events_entry;
226#endif
227};
228
229/* Algorithm parameters. We keep them on a per-algorithm approach, so they can
230 * be tuned individually for each interface.
231 */
232struct rc_pid_rateinfo {
233
234 /* Map sorted rates to rates in ieee80211_hw_mode. */
235 int index;
236
237 /* Map rates in ieee80211_hw_mode to sorted rates. */
238 int rev_index;
239
240 /* Did we do any measurement on this rate? */
241 bool valid;
242
243 /* Comparison with the lowest rate. */
244 int diff;
245};
246
247struct rc_pid_info {
248
249 /* The failed frames percentage target. */
250 unsigned int target;
251
252 /* Rate at which failed frames percentage is sampled in 0.001s. */
253 unsigned int sampling_period;
254
255 /* P, I and D coefficients. */
256 int coeff_p;
257 int coeff_i;
258 int coeff_d;
259
260 /* Exponential averaging shift. */
261 unsigned int smoothing_shift;
262
263 /* Sharpening factor and duration. */
264 unsigned int sharpen_factor;
265 unsigned int sharpen_duration;
266
267 /* Normalization offset. */
268 unsigned int norm_offset;
269
270 /* Fast starst parameter. */
271 unsigned int fast_start;
272
273 /* Rates information. */
274 struct rc_pid_rateinfo *rinfo;
275
276 /* Index of the last used rate. */
277 int oldrate;
278
279#ifdef CONFIG_MAC80211_DEBUGFS
280 /* Debugfs entries created for the parameters above. */
281 struct rc_pid_debugfs_entries dentries;
282#endif
283};
284
285#endif /* RC80211_PID_H */
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
new file mode 100644
index 000000000000..554c4baed6fb
--- /dev/null
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -0,0 +1,549 @@
1/*
2 * Copyright 2002-2005, Instant802 Networks, Inc.
3 * Copyright 2005, Devicescape Software, Inc.
4 * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
5 * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/netdevice.h>
13#include <linux/types.h>
14#include <linux/skbuff.h>
15#include <linux/debugfs.h>
16#include <net/mac80211.h>
17#include "ieee80211_rate.h"
18
19#include "rc80211_pid.h"
20
21
22/* This is an implementation of a TX rate control algorithm that uses a PID
23 * controller. Given a target failed frames rate, the controller decides about
24 * TX rate changes to meet the target failed frames rate.
25 *
26 * The controller basically computes the following:
27 *
28 * adj = CP * err + CI * err_avg + CD * (err - last_err) * (1 + sharpening)
29 *
30 * where
31 * adj adjustment value that is used to switch TX rate (see below)
32 * err current error: target vs. current failed frames percentage
33 * last_err last error
34 * err_avg average (i.e. poor man's integral) of recent errors
35 * sharpening non-zero when fast response is needed (i.e. right after
36 * association or no frames sent for a long time), heading
37 * to zero over time
38 * CP Proportional coefficient
39 * CI Integral coefficient
40 * CD Derivative coefficient
41 *
42 * CP, CI, CD are subject to careful tuning.
43 *
44 * The integral component uses a exponential moving average approach instead of
45 * an actual sliding window. The advantage is that we don't need to keep an
46 * array of the last N error values and computation is easier.
47 *
48 * Once we have the adj value, we map it to a rate by means of a learning
49 * algorithm. This algorithm keeps the state of the percentual failed frames
50 * difference between rates. The behaviour of the lowest available rate is kept
51 * as a reference value, and every time we switch between two rates, we compute
52 * the difference between the failed frames each rate exhibited. By doing so,
53 * we compare behaviours which different rates exhibited in adjacent timeslices,
54 * thus the comparison is minimally affected by external conditions. This
55 * difference gets propagated to the whole set of measurements, so that the
56 * reference is always the same. Periodically, we normalize this set so that
57 * recent events weigh the most. By comparing the adj value with this set, we
58 * avoid pejorative switches to lower rates and allow for switches to higher
59 * rates if they behaved well.
60 *
61 * Note that for the computations we use a fixed-point representation to avoid
62 * floating point arithmetic. Hence, all values are shifted left by
63 * RC_PID_ARITH_SHIFT.
64 */
65
66
67/* Shift the adjustment so that we won't switch to a lower rate if it exhibited
68 * a worse failed frames behaviour and we'll choose the highest rate whose
69 * failed frames behaviour is not worse than the one of the original rate
70 * target. While at it, check that the adjustment is within the ranges. Then,
71 * provide the new rate index. */
72static int rate_control_pid_shift_adjust(struct rc_pid_rateinfo *r,
73 int adj, int cur, int l)
74{
75 int i, j, k, tmp;
76
77 j = r[cur].rev_index;
78 i = j + adj;
79
80 if (i < 0)
81 return r[0].index;
82 if (i >= l - 1)
83 return r[l - 1].index;
84
85 tmp = i;
86
87 if (adj < 0) {
88 for (k = j; k >= i; k--)
89 if (r[k].diff <= r[j].diff)
90 tmp = k;
91 } else {
92 for (k = i + 1; k + i < l; k++)
93 if (r[k].diff <= r[i].diff)
94 tmp = k;
95 }
96
97 return r[tmp].index;
98}
99
100static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
101 struct sta_info *sta, int adj,
102 struct rc_pid_rateinfo *rinfo)
103{
104 struct ieee80211_sub_if_data *sdata;
105 struct ieee80211_hw_mode *mode;
106 int newidx;
107 int maxrate;
108 int back = (adj > 0) ? 1 : -1;
109
110 sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
111
112 mode = local->oper_hw_mode;
113 maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
114
115 newidx = rate_control_pid_shift_adjust(rinfo, adj, sta->txrate,
116 mode->num_rates);
117
118 while (newidx != sta->txrate) {
119 if (rate_supported(sta, mode, newidx) &&
120 (maxrate < 0 || newidx <= maxrate)) {
121 sta->txrate = newidx;
122 break;
123 }
124
125 newidx += back;
126 }
127
128#ifdef CONFIG_MAC80211_DEBUGFS
129 rate_control_pid_event_rate_change(
130 &((struct rc_pid_sta_info *)sta->rate_ctrl_priv)->events,
131 newidx, mode->rates[newidx].rate);
132#endif
133}
134
135/* Normalize the failed frames per-rate differences. */
136static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l)
137{
138 int i, norm_offset = pinfo->norm_offset;
139 struct rc_pid_rateinfo *r = pinfo->rinfo;
140
141 if (r[0].diff > norm_offset)
142 r[0].diff -= norm_offset;
143 else if (r[0].diff < -norm_offset)
144 r[0].diff += norm_offset;
145 for (i = 0; i < l - 1; i++)
146 if (r[i + 1].diff > r[i].diff + norm_offset)
147 r[i + 1].diff -= norm_offset;
148 else if (r[i + 1].diff <= r[i].diff)
149 r[i + 1].diff += norm_offset;
150}
151
152static void rate_control_pid_sample(struct rc_pid_info *pinfo,
153 struct ieee80211_local *local,
154 struct sta_info *sta)
155{
156 struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
157 struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
158 struct ieee80211_hw_mode *mode;
159 u32 pf;
160 s32 err_avg;
161 u32 err_prop;
162 u32 err_int;
163 u32 err_der;
164 int adj, i, j, tmp;
165 unsigned long period;
166
167 mode = local->oper_hw_mode;
168 spinfo = sta->rate_ctrl_priv;
169
170 /* In case nothing happened during the previous control interval, turn
171 * the sharpening factor on. */
172 period = (HZ * pinfo->sampling_period + 500) / 1000;
173 if (!period)
174 period = 1;
175 if (jiffies - spinfo->last_sample > 2 * period)
176 spinfo->sharp_cnt = pinfo->sharpen_duration;
177
178 spinfo->last_sample = jiffies;
179
180 /* This should never happen, but in case, we assume the old sample is
181 * still a good measurement and copy it. */
182 if (unlikely(spinfo->tx_num_xmit == 0))
183 pf = spinfo->last_pf;
184 else {
185 pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
186 pf <<= RC_PID_ARITH_SHIFT;
187 }
188
189 spinfo->tx_num_xmit = 0;
190 spinfo->tx_num_failed = 0;
191
192 /* If we just switched rate, update the rate behaviour info. */
193 if (pinfo->oldrate != sta->txrate) {
194
195 i = rinfo[pinfo->oldrate].rev_index;
196 j = rinfo[sta->txrate].rev_index;
197
198 tmp = (pf - spinfo->last_pf);
199 tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT);
200
201 rinfo[j].diff = rinfo[i].diff + tmp;
202 pinfo->oldrate = sta->txrate;
203 }
204 rate_control_pid_normalize(pinfo, mode->num_rates);
205
206 /* Compute the proportional, integral and derivative errors. */
207 err_prop = (pinfo->target << RC_PID_ARITH_SHIFT) - pf;
208
209 err_avg = spinfo->err_avg_sc >> pinfo->smoothing_shift;
210 spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
211 err_int = spinfo->err_avg_sc >> pinfo->smoothing_shift;
212
213 err_der = (pf - spinfo->last_pf) *
214 (1 + pinfo->sharpen_factor * spinfo->sharp_cnt);
215 spinfo->last_pf = pf;
216 if (spinfo->sharp_cnt)
217 spinfo->sharp_cnt--;
218
219#ifdef CONFIG_MAC80211_DEBUGFS
220 rate_control_pid_event_pf_sample(&spinfo->events, pf, err_prop, err_int,
221 err_der);
222#endif
223
224 /* Compute the controller output. */
225 adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
226 + err_der * pinfo->coeff_d);
227 adj = RC_PID_DO_ARITH_RIGHT_SHIFT(adj, 2 * RC_PID_ARITH_SHIFT);
228
229 /* Change rate. */
230 if (adj)
231 rate_control_pid_adjust_rate(local, sta, adj, rinfo);
232}
233
234static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
235 struct sk_buff *skb,
236 struct ieee80211_tx_status *status)
237{
238 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
239 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
240 struct ieee80211_sub_if_data *sdata;
241 struct rc_pid_info *pinfo = priv;
242 struct sta_info *sta;
243 struct rc_pid_sta_info *spinfo;
244 unsigned long period;
245
246 sta = sta_info_get(local, hdr->addr1);
247
248 if (!sta)
249 return;
250
251 /* Don't update the state if we're not controlling the rate. */
252 sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
253 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
254 sta->txrate = sdata->bss->max_ratectrl_rateidx;
255 return;
256 }
257
258 /* Ignore all frames that were sent with a different rate than the rate
259 * we currently advise mac80211 to use. */
260 if (status->control.rate != &local->oper_hw_mode->rates[sta->txrate])
261 goto ignore;
262
263 spinfo = sta->rate_ctrl_priv;
264 spinfo->tx_num_xmit++;
265
266#ifdef CONFIG_MAC80211_DEBUGFS
267 rate_control_pid_event_tx_status(&spinfo->events, status);
268#endif
269
270 /* We count frames that totally failed to be transmitted as two bad
271 * frames, those that made it out but had some retries as one good and
272 * one bad frame. */
273 if (status->excessive_retries) {
274 spinfo->tx_num_failed += 2;
275 spinfo->tx_num_xmit++;
276 } else if (status->retry_count) {
277 spinfo->tx_num_failed++;
278 spinfo->tx_num_xmit++;
279 }
280
281 if (status->excessive_retries) {
282 sta->tx_retry_failed++;
283 sta->tx_num_consecutive_failures++;
284 sta->tx_num_mpdu_fail++;
285 } else {
286 sta->last_ack_rssi[0] = sta->last_ack_rssi[1];
287 sta->last_ack_rssi[1] = sta->last_ack_rssi[2];
288 sta->last_ack_rssi[2] = status->ack_signal;
289 sta->tx_num_consecutive_failures = 0;
290 sta->tx_num_mpdu_ok++;
291 }
292 sta->tx_retry_count += status->retry_count;
293 sta->tx_num_mpdu_fail += status->retry_count;
294
295 /* Update PID controller state. */
296 period = (HZ * pinfo->sampling_period + 500) / 1000;
297 if (!period)
298 period = 1;
299 if (time_after(jiffies, spinfo->last_sample + period))
300 rate_control_pid_sample(pinfo, local, sta);
301
302ignore:
303 sta_info_put(sta);
304}
305
306static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
307 struct ieee80211_hw_mode *mode,
308 struct sk_buff *skb,
309 struct rate_selection *sel)
310{
311 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
312 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
313 struct ieee80211_sub_if_data *sdata;
314 struct sta_info *sta;
315 int rateidx;
316 u16 fc;
317
318 sta = sta_info_get(local, hdr->addr1);
319
320 /* Send management frames and broadcast/multicast data using lowest
321 * rate. */
322 fc = le16_to_cpu(hdr->frame_control);
323 if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
324 is_multicast_ether_addr(hdr->addr1) || !sta) {
325 sel->rate = rate_lowest(local, mode, sta);
326 if (sta)
327 sta_info_put(sta);
328 return;
329 }
330
331 /* If a forced rate is in effect, select it. */
332 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
333 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
334 sta->txrate = sdata->bss->force_unicast_rateidx;
335
336 rateidx = sta->txrate;
337
338 if (rateidx >= mode->num_rates)
339 rateidx = mode->num_rates - 1;
340
341 sta->last_txrate = rateidx;
342
343 sta_info_put(sta);
344
345 sel->rate = &mode->rates[rateidx];
346
347#ifdef CONFIG_MAC80211_DEBUGFS
348 rate_control_pid_event_tx_rate(
349 &((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events,
350 rateidx, mode->rates[rateidx].rate);
351#endif
352}
353
354static void rate_control_pid_rate_init(void *priv, void *priv_sta,
355 struct ieee80211_local *local,
356 struct sta_info *sta)
357{
358 /* TODO: This routine should consider using RSSI from previous packets
359 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
360 * Until that method is implemented, we will use the lowest supported
361 * rate as a workaround. */
362 sta->txrate = rate_lowest_index(local, local->oper_hw_mode, sta);
363}
364
365static void *rate_control_pid_alloc(struct ieee80211_local *local)
366{
367 struct rc_pid_info *pinfo;
368 struct rc_pid_rateinfo *rinfo;
369 struct ieee80211_hw_mode *mode;
370 int i, j, tmp;
371 bool s;
372#ifdef CONFIG_MAC80211_DEBUGFS
373 struct rc_pid_debugfs_entries *de;
374#endif
375
376 pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
377 if (!pinfo)
378 return NULL;
379
380 /* We can safely assume that oper_hw_mode won't change unless we get
381 * reinitialized. */
382 mode = local->oper_hw_mode;
383 rinfo = kmalloc(sizeof(*rinfo) * mode->num_rates, GFP_ATOMIC);
384 if (!rinfo) {
385 kfree(pinfo);
386 return NULL;
387 }
388
389 /* Sort the rates. This is optimized for the most common case (i.e.
390 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
391 * mapping too. */
392 for (i = 0; i < mode->num_rates; i++) {
393 rinfo[i].index = i;
394 rinfo[i].rev_index = i;
395 if (pinfo->fast_start)
396 rinfo[i].diff = 0;
397 else
398 rinfo[i].diff = i * pinfo->norm_offset;
399 }
400 for (i = 1; i < mode->num_rates; i++) {
401 s = 0;
402 for (j = 0; j < mode->num_rates - i; j++)
403 if (unlikely(mode->rates[rinfo[j].index].rate >
404 mode->rates[rinfo[j + 1].index].rate)) {
405 tmp = rinfo[j].index;
406 rinfo[j].index = rinfo[j + 1].index;
407 rinfo[j + 1].index = tmp;
408 rinfo[rinfo[j].index].rev_index = j;
409 rinfo[rinfo[j + 1].index].rev_index = j + 1;
410 s = 1;
411 }
412 if (!s)
413 break;
414 }
415
416 pinfo->target = RC_PID_TARGET_PF;
417 pinfo->sampling_period = RC_PID_INTERVAL;
418 pinfo->coeff_p = RC_PID_COEFF_P;
419 pinfo->coeff_i = RC_PID_COEFF_I;
420 pinfo->coeff_d = RC_PID_COEFF_D;
421 pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
422 pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
423 pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
424 pinfo->norm_offset = RC_PID_NORM_OFFSET;
425 pinfo->fast_start = RC_PID_FAST_START;
426 pinfo->rinfo = rinfo;
427 pinfo->oldrate = 0;
428
429#ifdef CONFIG_MAC80211_DEBUGFS
430 de = &pinfo->dentries;
431 de->dir = debugfs_create_dir("rc80211_pid",
432 local->hw.wiphy->debugfsdir);
433 de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR,
434 de->dir, &pinfo->target);
435 de->sampling_period = debugfs_create_u32("sampling_period",
436 S_IRUSR | S_IWUSR, de->dir,
437 &pinfo->sampling_period);
438 de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR,
439 de->dir, &pinfo->coeff_p);
440 de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR,
441 de->dir, &pinfo->coeff_i);
442 de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR,
443 de->dir, &pinfo->coeff_d);
444 de->smoothing_shift = debugfs_create_u32("smoothing_shift",
445 S_IRUSR | S_IWUSR, de->dir,
446 &pinfo->smoothing_shift);
447 de->sharpen_factor = debugfs_create_u32("sharpen_factor",
448 S_IRUSR | S_IWUSR, de->dir,
449 &pinfo->sharpen_factor);
450 de->sharpen_duration = debugfs_create_u32("sharpen_duration",
451 S_IRUSR | S_IWUSR, de->dir,
452 &pinfo->sharpen_duration);
453 de->norm_offset = debugfs_create_u32("norm_offset",
454 S_IRUSR | S_IWUSR, de->dir,
455 &pinfo->norm_offset);
456 de->fast_start = debugfs_create_bool("fast_start",
457 S_IRUSR | S_IWUSR, de->dir,
458 &pinfo->fast_start);
459#endif
460
461 return pinfo;
462}
463
464static void rate_control_pid_free(void *priv)
465{
466 struct rc_pid_info *pinfo = priv;
467#ifdef CONFIG_MAC80211_DEBUGFS
468 struct rc_pid_debugfs_entries *de = &pinfo->dentries;
469
470 debugfs_remove(de->fast_start);
471 debugfs_remove(de->norm_offset);
472 debugfs_remove(de->sharpen_duration);
473 debugfs_remove(de->sharpen_factor);
474 debugfs_remove(de->smoothing_shift);
475 debugfs_remove(de->coeff_d);
476 debugfs_remove(de->coeff_i);
477 debugfs_remove(de->coeff_p);
478 debugfs_remove(de->sampling_period);
479 debugfs_remove(de->target);
480 debugfs_remove(de->dir);
481#endif
482
483 kfree(pinfo->rinfo);
484 kfree(pinfo);
485}
486
487static void rate_control_pid_clear(void *priv)
488{
489}
490
491static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
492{
493 struct rc_pid_sta_info *spinfo;
494
495 spinfo = kzalloc(sizeof(*spinfo), gfp);
496 if (spinfo == NULL)
497 return NULL;
498
499 spinfo->last_sample = jiffies;
500
501#ifdef CONFIG_MAC80211_DEBUGFS
502 spin_lock_init(&spinfo->events.lock);
503 init_waitqueue_head(&spinfo->events.waitqueue);
504#endif
505
506 return spinfo;
507}
508
509static void rate_control_pid_free_sta(void *priv, void *priv_sta)
510{
511 struct rc_pid_sta_info *spinfo = priv_sta;
512 kfree(spinfo);
513}
514
515static struct rate_control_ops mac80211_rcpid = {
516 .name = "pid",
517 .tx_status = rate_control_pid_tx_status,
518 .get_rate = rate_control_pid_get_rate,
519 .rate_init = rate_control_pid_rate_init,
520 .clear = rate_control_pid_clear,
521 .alloc = rate_control_pid_alloc,
522 .free = rate_control_pid_free,
523 .alloc_sta = rate_control_pid_alloc_sta,
524 .free_sta = rate_control_pid_free_sta,
525#ifdef CONFIG_MAC80211_DEBUGFS
526 .add_sta_debugfs = rate_control_pid_add_sta_debugfs,
527 .remove_sta_debugfs = rate_control_pid_remove_sta_debugfs,
528#endif
529};
530
531MODULE_DESCRIPTION("PID controller based rate control algorithm");
532MODULE_AUTHOR("Stefano Brivio");
533MODULE_AUTHOR("Mattias Nissler");
534MODULE_LICENSE("GPL");
535
536int __init rc80211_pid_init(void)
537{
538 return ieee80211_rate_control_register(&mac80211_rcpid);
539}
540
541void __exit rc80211_pid_exit(void)
542{
543 ieee80211_rate_control_unregister(&mac80211_rcpid);
544}
545
546#ifdef CONFIG_MAC80211_RC_PID_MODULE
547module_init(rc80211_pid_init);
548module_exit(rc80211_pid_exit);
549#endif
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
new file mode 100644
index 000000000000..88b8dc9999bb
--- /dev/null
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -0,0 +1,223 @@
1/*
2 * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/spinlock.h>
10#include <linux/poll.h>
11#include <linux/netdevice.h>
12#include <linux/types.h>
13#include <linux/skbuff.h>
14
15#include <net/mac80211.h>
16#include "ieee80211_rate.h"
17
18#include "rc80211_pid.h"
19
20static void rate_control_pid_event(struct rc_pid_event_buffer *buf,
21 enum rc_pid_event_type type,
22 union rc_pid_event_data *data)
23{
24 struct rc_pid_event *ev;
25 unsigned long status;
26
27 spin_lock_irqsave(&buf->lock, status);
28 ev = &(buf->ring[buf->next_entry]);
29 buf->next_entry = (buf->next_entry + 1) % RC_PID_EVENT_RING_SIZE;
30
31 ev->timestamp = jiffies;
32 ev->id = buf->ev_count++;
33 ev->type = type;
34 ev->data = *data;
35
36 spin_unlock_irqrestore(&buf->lock, status);
37
38 wake_up_all(&buf->waitqueue);
39}
40
41void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
42 struct ieee80211_tx_status *stat)
43{
44 union rc_pid_event_data evd;
45
46 memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_status));
47 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd);
48}
49
50void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
51 int index, int rate)
52{
53 union rc_pid_event_data evd;
54
55 evd.index = index;
56 evd.rate = rate;
57 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_RATE_CHANGE, &evd);
58}
59
60void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
61 int index, int rate)
62{
63 union rc_pid_event_data evd;
64
65 evd.index = index;
66 evd.rate = rate;
67 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_RATE, &evd);
68}
69
70void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
71 s32 pf_sample, s32 prop_err,
72 s32 int_err, s32 der_err)
73{
74 union rc_pid_event_data evd;
75
76 evd.pf_sample = pf_sample;
77 evd.prop_err = prop_err;
78 evd.int_err = int_err;
79 evd.der_err = der_err;
80 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_PF_SAMPLE, &evd);
81}
82
83static int rate_control_pid_events_open(struct inode *inode, struct file *file)
84{
85 struct rc_pid_sta_info *sinfo = inode->i_private;
86 struct rc_pid_event_buffer *events = &sinfo->events;
87 struct rc_pid_events_file_info *file_info;
88 unsigned int status;
89
90 /* Allocate a state struct */
91 file_info = kmalloc(sizeof(*file_info), GFP_KERNEL);
92 if (file_info == NULL)
93 return -ENOMEM;
94
95 spin_lock_irqsave(&events->lock, status);
96
97 file_info->next_entry = events->next_entry;
98 file_info->events = events;
99
100 spin_unlock_irqrestore(&events->lock, status);
101
102 file->private_data = file_info;
103
104 return 0;
105}
106
107static int rate_control_pid_events_release(struct inode *inode,
108 struct file *file)
109{
110 struct rc_pid_events_file_info *file_info = file->private_data;
111
112 kfree(file_info);
113
114 return 0;
115}
116
117static unsigned int rate_control_pid_events_poll(struct file *file,
118 poll_table *wait)
119{
120 struct rc_pid_events_file_info *file_info = file->private_data;
121
122 poll_wait(file, &file_info->events->waitqueue, wait);
123
124 return POLLIN | POLLRDNORM;
125}
126
127#define RC_PID_PRINT_BUF_SIZE 64
128
129static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
130 size_t length, loff_t *offset)
131{
132 struct rc_pid_events_file_info *file_info = file->private_data;
133 struct rc_pid_event_buffer *events = file_info->events;
134 struct rc_pid_event *ev;
135 char pb[RC_PID_PRINT_BUF_SIZE];
136 int ret;
137 int p;
138 unsigned int status;
139
140 /* Check if there is something to read. */
141 if (events->next_entry == file_info->next_entry) {
142 if (file->f_flags & O_NONBLOCK)
143 return -EAGAIN;
144
145 /* Wait */
146 ret = wait_event_interruptible(events->waitqueue,
147 events->next_entry != file_info->next_entry);
148
149 if (ret)
150 return ret;
151 }
152
153 /* Write out one event per call. I don't care whether it's a little
154 * inefficient, this is debugging code anyway. */
155 spin_lock_irqsave(&events->lock, status);
156
157 /* Get an event */
158 ev = &(events->ring[file_info->next_entry]);
159 file_info->next_entry = (file_info->next_entry + 1) %
160 RC_PID_EVENT_RING_SIZE;
161
162 /* Print information about the event. Note that userpace needs to
163 * provide large enough buffers. */
164 length = length < RC_PID_PRINT_BUF_SIZE ?
165 length : RC_PID_PRINT_BUF_SIZE;
166 p = snprintf(pb, length, "%u %lu ", ev->id, ev->timestamp);
167 switch (ev->type) {
168 case RC_PID_EVENT_TYPE_TX_STATUS:
169 p += snprintf(pb + p, length - p, "tx_status %u %u",
170 ev->data.tx_status.excessive_retries,
171 ev->data.tx_status.retry_count);
172 break;
173 case RC_PID_EVENT_TYPE_RATE_CHANGE:
174 p += snprintf(pb + p, length - p, "rate_change %d %d",
175 ev->data.index, ev->data.rate);
176 break;
177 case RC_PID_EVENT_TYPE_TX_RATE:
178 p += snprintf(pb + p, length - p, "tx_rate %d %d",
179 ev->data.index, ev->data.rate);
180 break;
181 case RC_PID_EVENT_TYPE_PF_SAMPLE:
182 p += snprintf(pb + p, length - p,
183 "pf_sample %d %d %d %d",
184 ev->data.pf_sample, ev->data.prop_err,
185 ev->data.int_err, ev->data.der_err);
186 break;
187 }
188 p += snprintf(pb + p, length - p, "\n");
189
190 spin_unlock_irqrestore(&events->lock, status);
191
192 if (copy_to_user(buf, pb, p))
193 return -EFAULT;
194
195 return p;
196}
197
198#undef RC_PID_PRINT_BUF_SIZE
199
200static struct file_operations rc_pid_fop_events = {
201 .owner = THIS_MODULE,
202 .read = rate_control_pid_events_read,
203 .poll = rate_control_pid_events_poll,
204 .open = rate_control_pid_events_open,
205 .release = rate_control_pid_events_release,
206};
207
208void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
209 struct dentry *dir)
210{
211 struct rc_pid_sta_info *spinfo = priv_sta;
212
213 spinfo->events_entry = debugfs_create_file("rc_pid_events", S_IRUGO,
214 dir, spinfo,
215 &rc_pid_fop_events);
216}
217
218void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta)
219{
220 struct rc_pid_sta_info *spinfo = priv_sta;
221
222 debugfs_remove(spinfo->events_entry);
223}
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
index 314b8de88862..934676d687d6 100644
--- a/net/mac80211/rc80211_simple.c
+++ b/net/mac80211/rc80211_simple.c
@@ -7,13 +7,13 @@
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9
10#include <linux/module.h>
11#include <linux/init.h> 10#include <linux/init.h>
12#include <linux/netdevice.h> 11#include <linux/netdevice.h>
13#include <linux/types.h> 12#include <linux/types.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/skbuff.h> 14#include <linux/skbuff.h>
16#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/module.h>
17 17
18#include <net/mac80211.h> 18#include <net/mac80211.h>
19#include "ieee80211_i.h" 19#include "ieee80211_i.h"
@@ -24,13 +24,13 @@
24/* This is a minimal implementation of TX rate controlling that can be used 24/* This is a minimal implementation of TX rate controlling that can be used
25 * as the default when no improved mechanisms are available. */ 25 * as the default when no improved mechanisms are available. */
26 26
27#define RATE_CONTROL_NUM_DOWN 20
28#define RATE_CONTROL_NUM_UP 15
27 29
28#define RATE_CONTROL_EMERG_DEC 2 30#define RATE_CONTROL_EMERG_DEC 2
29#define RATE_CONTROL_INTERVAL (HZ / 20) 31#define RATE_CONTROL_INTERVAL (HZ / 20)
30#define RATE_CONTROL_MIN_TX 10 32#define RATE_CONTROL_MIN_TX 10
31 33
32MODULE_ALIAS("rc80211_default");
33
34static void rate_control_rate_inc(struct ieee80211_local *local, 34static void rate_control_rate_inc(struct ieee80211_local *local,
35 struct sta_info *sta) 35 struct sta_info *sta)
36{ 36{
@@ -90,26 +90,6 @@ static void rate_control_rate_dec(struct ieee80211_local *local,
90 } 90 }
91} 91}
92 92
93
94static struct ieee80211_rate *
95rate_control_lowest_rate(struct ieee80211_local *local,
96 struct ieee80211_hw_mode *mode)
97{
98 int i;
99
100 for (i = 0; i < mode->num_rates; i++) {
101 struct ieee80211_rate *rate = &mode->rates[i];
102
103 if (rate->flags & IEEE80211_RATE_SUPPORTED)
104 return rate;
105 }
106
107 printk(KERN_DEBUG "rate_control_lowest_rate - no supported rates "
108 "found\n");
109 return &mode->rates[0];
110}
111
112
113struct global_rate_control { 93struct global_rate_control {
114 int dummy; 94 int dummy;
115}; 95};
@@ -219,35 +199,33 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev,
219} 199}
220 200
221 201
222static struct ieee80211_rate * 202static void
223rate_control_simple_get_rate(void *priv, struct net_device *dev, 203rate_control_simple_get_rate(void *priv, struct net_device *dev,
204 struct ieee80211_hw_mode *mode,
224 struct sk_buff *skb, 205 struct sk_buff *skb,
225 struct rate_control_extra *extra) 206 struct rate_selection *sel)
226{ 207{
227 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 208 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
228 struct ieee80211_sub_if_data *sdata;
229 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 209 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
230 struct ieee80211_hw_mode *mode = extra->mode; 210 struct ieee80211_sub_if_data *sdata;
231 struct sta_info *sta; 211 struct sta_info *sta;
232 int rateidx, nonerp_idx; 212 int rateidx;
233 u16 fc; 213 u16 fc;
234 214
235 memset(extra, 0, sizeof(*extra)); 215 sta = sta_info_get(local, hdr->addr1);
236 216
217 /* Send management frames and broadcast/multicast data using lowest
218 * rate. */
237 fc = le16_to_cpu(hdr->frame_control); 219 fc = le16_to_cpu(hdr->frame_control);
238 if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || 220 if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
239 (hdr->addr1[0] & 0x01)) { 221 is_multicast_ether_addr(hdr->addr1) || !sta) {
240 /* Send management frames and broadcast/multicast data using 222 sel->rate = rate_lowest(local, mode, sta);
241 * lowest rate. */ 223 if (sta)
242 /* TODO: this could probably be improved.. */ 224 sta_info_put(sta);
243 return rate_control_lowest_rate(local, mode); 225 return;
244 } 226 }
245 227
246 sta = sta_info_get(local, hdr->addr1); 228 /* If a forced rate is in effect, select it. */
247
248 if (!sta)
249 return rate_control_lowest_rate(local, mode);
250
251 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 229 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
252 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) 230 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
253 sta->txrate = sdata->bss->force_unicast_rateidx; 231 sta->txrate = sdata->bss->force_unicast_rateidx;
@@ -258,17 +236,10 @@ rate_control_simple_get_rate(void *priv, struct net_device *dev,
258 rateidx = mode->num_rates - 1; 236 rateidx = mode->num_rates - 1;
259 237
260 sta->last_txrate = rateidx; 238 sta->last_txrate = rateidx;
261 nonerp_idx = rateidx;
262 while (nonerp_idx > 0 &&
263 ((mode->rates[nonerp_idx].flags & IEEE80211_RATE_ERP) ||
264 !(mode->rates[nonerp_idx].flags & IEEE80211_RATE_SUPPORTED) ||
265 !(sta->supp_rates & BIT(nonerp_idx))))
266 nonerp_idx--;
267 extra->nonerp = &mode->rates[nonerp_idx];
268 239
269 sta_info_put(sta); 240 sta_info_put(sta);
270 241
271 return &mode->rates[rateidx]; 242 sel->rate = &mode->rates[rateidx];
272} 243}
273 244
274 245
@@ -394,8 +365,7 @@ static void rate_control_simple_remove_sta_debugfs(void *priv, void *priv_sta)
394} 365}
395#endif 366#endif
396 367
397static struct rate_control_ops rate_control_simple = { 368static struct rate_control_ops mac80211_rcsimple = {
398 .module = THIS_MODULE,
399 .name = "simple", 369 .name = "simple",
400 .tx_status = rate_control_simple_tx_status, 370 .tx_status = rate_control_simple_tx_status,
401 .get_rate = rate_control_simple_get_rate, 371 .get_rate = rate_control_simple_get_rate,
@@ -411,21 +381,20 @@ static struct rate_control_ops rate_control_simple = {
411#endif 381#endif
412}; 382};
413 383
384MODULE_LICENSE("GPL");
385MODULE_DESCRIPTION("Simple rate control algorithm");
414 386
415static int __init rate_control_simple_init(void) 387int __init rc80211_simple_init(void)
416{ 388{
417 return ieee80211_rate_control_register(&rate_control_simple); 389 return ieee80211_rate_control_register(&mac80211_rcsimple);
418} 390}
419 391
420 392void __exit rc80211_simple_exit(void)
421static void __exit rate_control_simple_exit(void)
422{ 393{
423 ieee80211_rate_control_unregister(&rate_control_simple); 394 ieee80211_rate_control_unregister(&mac80211_rcsimple);
424} 395}
425 396
426 397#ifdef CONFIG_MAC80211_RC_SIMPLE_MODULE
427subsys_initcall(rate_control_simple_init); 398module_init(rc80211_simple_init);
428module_exit(rate_control_simple_exit); 399module_exit(rc80211_simple_exit);
429 400#endif
430MODULE_DESCRIPTION("Simple rate control algorithm for ieee80211");
431MODULE_LICENSE("GPL");
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ece77766ea2b..d44c87269bcb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -24,6 +24,10 @@
24#include "tkip.h" 24#include "tkip.h"
25#include "wme.h" 25#include "wme.h"
26 26
27u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
28 struct tid_ampdu_rx *tid_agg_rx,
29 struct sk_buff *skb, u16 mpdu_seq_num,
30 int bar_req);
27/* 31/*
28 * monitor mode reception 32 * monitor mode reception
29 * 33 *
@@ -61,8 +65,12 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status,
61 return 1; 65 return 1;
62 if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) 66 if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len))
63 return 1; 67 return 1;
64 if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == 68 if (((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
65 cpu_to_le16(IEEE80211_FTYPE_CTL)) 69 cpu_to_le16(IEEE80211_FTYPE_CTL)) &&
70 ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
71 cpu_to_le16(IEEE80211_STYPE_PSPOLL)) &&
72 ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
73 cpu_to_le16(IEEE80211_STYPE_BACK_REQ)))
66 return 1; 74 return 1;
67 return 0; 75 return 0;
68} 76}
@@ -79,8 +87,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
79 struct ieee80211_sub_if_data *sdata; 87 struct ieee80211_sub_if_data *sdata;
80 struct ieee80211_rate *rate; 88 struct ieee80211_rate *rate;
81 int needed_headroom = 0; 89 int needed_headroom = 0;
82 struct ieee80211_rtap_hdr { 90 struct ieee80211_radiotap_header *rthdr;
83 struct ieee80211_radiotap_header hdr; 91 __le64 *rttsft = NULL;
92 struct ieee80211_rtap_fixed_data {
84 u8 flags; 93 u8 flags;
85 u8 rate; 94 u8 rate;
86 __le16 chan_freq; 95 __le16 chan_freq;
@@ -88,7 +97,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
88 u8 antsignal; 97 u8 antsignal;
89 u8 padding_for_rxflags; 98 u8 padding_for_rxflags;
90 __le16 rx_flags; 99 __le16 rx_flags;
91 } __attribute__ ((packed)) *rthdr; 100 } __attribute__ ((packed)) *rtfixed;
92 struct sk_buff *skb, *skb2; 101 struct sk_buff *skb, *skb2;
93 struct net_device *prev_dev = NULL; 102 struct net_device *prev_dev = NULL;
94 int present_fcs_len = 0; 103 int present_fcs_len = 0;
@@ -105,7 +114,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
105 if (status->flag & RX_FLAG_RADIOTAP) 114 if (status->flag & RX_FLAG_RADIOTAP)
106 rtap_len = ieee80211_get_radiotap_len(origskb->data); 115 rtap_len = ieee80211_get_radiotap_len(origskb->data);
107 else 116 else
108 needed_headroom = sizeof(*rthdr); 117 /* room for radiotap header, always present fields and TSFT */
118 needed_headroom = sizeof(*rthdr) + sizeof(*rtfixed) + 8;
109 119
110 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) 120 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
111 present_fcs_len = FCS_LEN; 121 present_fcs_len = FCS_LEN;
@@ -133,7 +143,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
133 * them allocate enough headroom to start with. 143 * them allocate enough headroom to start with.
134 */ 144 */
135 if (skb_headroom(skb) < needed_headroom && 145 if (skb_headroom(skb) < needed_headroom &&
136 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { 146 pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) {
137 dev_kfree_skb(skb); 147 dev_kfree_skb(skb);
138 return NULL; 148 return NULL;
139 } 149 }
@@ -152,45 +162,59 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
152 162
153 /* if necessary, prepend radiotap information */ 163 /* if necessary, prepend radiotap information */
154 if (!(status->flag & RX_FLAG_RADIOTAP)) { 164 if (!(status->flag & RX_FLAG_RADIOTAP)) {
165 rtfixed = (void *) skb_push(skb, sizeof(*rtfixed));
166 rtap_len = sizeof(*rthdr) + sizeof(*rtfixed);
167 if (status->flag & RX_FLAG_TSFT) {
168 rttsft = (void *) skb_push(skb, sizeof(*rttsft));
169 rtap_len += 8;
170 }
155 rthdr = (void *) skb_push(skb, sizeof(*rthdr)); 171 rthdr = (void *) skb_push(skb, sizeof(*rthdr));
156 memset(rthdr, 0, sizeof(*rthdr)); 172 memset(rthdr, 0, sizeof(*rthdr));
157 rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); 173 memset(rtfixed, 0, sizeof(*rtfixed));
158 rthdr->hdr.it_present = 174 rthdr->it_present =
159 cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | 175 cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
160 (1 << IEEE80211_RADIOTAP_RATE) | 176 (1 << IEEE80211_RADIOTAP_RATE) |
161 (1 << IEEE80211_RADIOTAP_CHANNEL) | 177 (1 << IEEE80211_RADIOTAP_CHANNEL) |
162 (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) | 178 (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) |
163 (1 << IEEE80211_RADIOTAP_RX_FLAGS)); 179 (1 << IEEE80211_RADIOTAP_RX_FLAGS));
164 rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? 180 rtfixed->flags = 0;
165 IEEE80211_RADIOTAP_F_FCS : 0; 181 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
182 rtfixed->flags |= IEEE80211_RADIOTAP_F_FCS;
183
184 if (rttsft) {
185 *rttsft = cpu_to_le64(status->mactime);
186 rthdr->it_present |=
187 cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
188 }
166 189
167 /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ 190 /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */
168 rthdr->rx_flags = 0; 191 rtfixed->rx_flags = 0;
169 if (status->flag & 192 if (status->flag &
170 (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) 193 (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
171 rthdr->rx_flags |= 194 rtfixed->rx_flags |=
172 cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); 195 cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS);
173 196
174 rate = ieee80211_get_rate(local, status->phymode, 197 rate = ieee80211_get_rate(local, status->phymode,
175 status->rate); 198 status->rate);
176 if (rate) 199 if (rate)
177 rthdr->rate = rate->rate / 5; 200 rtfixed->rate = rate->rate / 5;
178 201
179 rthdr->chan_freq = cpu_to_le16(status->freq); 202 rtfixed->chan_freq = cpu_to_le16(status->freq);
180 203
181 if (status->phymode == MODE_IEEE80211A) 204 if (status->phymode == MODE_IEEE80211A)
182 rthdr->chan_flags = 205 rtfixed->chan_flags =
183 cpu_to_le16(IEEE80211_CHAN_OFDM | 206 cpu_to_le16(IEEE80211_CHAN_OFDM |
184 IEEE80211_CHAN_5GHZ); 207 IEEE80211_CHAN_5GHZ);
185 else 208 else
186 rthdr->chan_flags = 209 rtfixed->chan_flags =
187 cpu_to_le16(IEEE80211_CHAN_DYN | 210 cpu_to_le16(IEEE80211_CHAN_DYN |
188 IEEE80211_CHAN_2GHZ); 211 IEEE80211_CHAN_2GHZ);
189 212
190 rthdr->antsignal = status->ssi; 213 rtfixed->antsignal = status->ssi;
214 rthdr->it_len = cpu_to_le16(rtap_len);
191 } 215 }
192 216
193 skb_set_mac_header(skb, 0); 217 skb_reset_mac_header(skb);
194 skb->ip_summed = CHECKSUM_UNNECESSARY; 218 skb->ip_summed = CHECKSUM_UNNECESSARY;
195 skb->pkt_type = PACKET_OTHERHOST; 219 skb->pkt_type = PACKET_OTHERHOST;
196 skb->protocol = htons(ETH_P_802_2); 220 skb->protocol = htons(ETH_P_802_2);
@@ -199,7 +223,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
199 if (!netif_running(sdata->dev)) 223 if (!netif_running(sdata->dev))
200 continue; 224 continue;
201 225
202 if (sdata->type != IEEE80211_IF_TYPE_MNTR) 226 if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR)
203 continue; 227 continue;
204 228
205 if (prev_dev) { 229 if (prev_dev) {
@@ -243,6 +267,10 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
243 u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; 267 u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN;
244 /* frame has qos control */ 268 /* frame has qos control */
245 tid = qc[0] & QOS_CONTROL_TID_MASK; 269 tid = qc[0] & QOS_CONTROL_TID_MASK;
270 if (qc[0] & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
271 rx->flags |= IEEE80211_TXRXD_RX_AMSDU;
272 else
273 rx->flags &= ~IEEE80211_TXRXD_RX_AMSDU;
246 } else { 274 } else {
247 if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { 275 if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) {
248 /* Separate TID for management frames */ 276 /* Separate TID for management frames */
@@ -266,11 +294,11 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
266 return TXRX_CONTINUE; 294 return TXRX_CONTINUE;
267} 295}
268 296
269static ieee80211_txrx_result 297
270ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) 298static u32 ieee80211_rx_load_stats(struct ieee80211_local *local,
299 struct sk_buff *skb,
300 struct ieee80211_rx_status *status)
271{ 301{
272 struct ieee80211_local *local = rx->local;
273 struct sk_buff *skb = rx->skb;
274 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 302 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
275 u32 load = 0, hdrtime; 303 u32 load = 0, hdrtime;
276 struct ieee80211_rate *rate; 304 struct ieee80211_rate *rate;
@@ -284,7 +312,7 @@ ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
284 312
285 rate = &mode->rates[0]; 313 rate = &mode->rates[0];
286 for (i = 0; i < mode->num_rates; i++) { 314 for (i = 0; i < mode->num_rates; i++) {
287 if (mode->rates[i].val == rx->u.rx.status->rate) { 315 if (mode->rates[i].val == status->rate) {
288 rate = &mode->rates[i]; 316 rate = &mode->rates[i];
289 break; 317 break;
290 } 318 }
@@ -308,8 +336,38 @@ ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
308 336
309 /* Divide channel_use by 8 to avoid wrapping around the counter */ 337 /* Divide channel_use by 8 to avoid wrapping around the counter */
310 load >>= CHAN_UTIL_SHIFT; 338 load >>= CHAN_UTIL_SHIFT;
311 local->channel_use_raw += load; 339
312 rx->u.rx.load = load; 340 return load;
341}
342
343static ieee80211_txrx_result
344ieee80211_rx_h_verify_ip_alignment(struct ieee80211_txrx_data *rx)
345{
346 int hdrlen;
347
348 /*
349 * Drivers are required to align the payload data in a way that
350 * guarantees that the contained IP header is aligned to a four-
351 * byte boundary. In the case of regular frames, this simply means
352 * aligning the payload to a four-byte boundary (because either
353 * the IP header is directly contained, or IV/RFC1042 headers that
354 * have a length divisible by four are in front of it.
355 *
356 * With A-MSDU frames, however, the payload data address must
357 * yield two modulo four because there are 14-byte 802.3 headers
358 * within the A-MSDU frames that push the IP header further back
359 * to a multiple of four again. Thankfully, the specs were sane
360 * enough this time around to require padding each A-MSDU subframe
361 * to a length that is a multiple of four.
362 *
363 * Padding like atheros hardware adds which is inbetween the 802.11
364 * header and the payload is not supported, the driver is required
365 * to move the 802.11 header further back in that case.
366 */
367 hdrlen = ieee80211_get_hdrlen(rx->fc);
368 if (rx->flags & IEEE80211_TXRXD_RX_AMSDU)
369 hdrlen += ETH_HLEN;
370 WARN_ON_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3);
313 371
314 return TXRX_CONTINUE; 372 return TXRX_CONTINUE;
315} 373}
@@ -317,7 +375,7 @@ ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
317ieee80211_rx_handler ieee80211_rx_pre_handlers[] = 375ieee80211_rx_handler ieee80211_rx_pre_handlers[] =
318{ 376{
319 ieee80211_rx_h_parse_qos, 377 ieee80211_rx_h_parse_qos,
320 ieee80211_rx_h_load_stats, 378 ieee80211_rx_h_verify_ip_alignment,
321 NULL 379 NULL
322}; 380};
323 381
@@ -338,8 +396,14 @@ ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx)
338 struct ieee80211_local *local = rx->local; 396 struct ieee80211_local *local = rx->local;
339 struct sk_buff *skb = rx->skb; 397 struct sk_buff *skb = rx->skb;
340 398
341 if (unlikely(local->sta_scanning != 0)) { 399 if (unlikely(local->sta_hw_scanning))
342 ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); 400 return ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status);
401
402 if (unlikely(local->sta_sw_scanning)) {
403 /* drop all the other packets during a software scan anyway */
404 if (ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status)
405 != TXRX_QUEUED)
406 dev_kfree_skb(skb);
343 return TXRX_QUEUED; 407 return TXRX_QUEUED;
344 } 408 }
345 409
@@ -377,18 +441,6 @@ ieee80211_rx_h_check(struct ieee80211_txrx_data *rx)
377 return TXRX_DROP; 441 return TXRX_DROP;
378 } 442 }
379 443
380 if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))
381 rx->skb->pkt_type = PACKET_OTHERHOST;
382 else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0)
383 rx->skb->pkt_type = PACKET_HOST;
384 else if (is_multicast_ether_addr(hdr->addr1)) {
385 if (is_broadcast_ether_addr(hdr->addr1))
386 rx->skb->pkt_type = PACKET_BROADCAST;
387 else
388 rx->skb->pkt_type = PACKET_MULTICAST;
389 } else
390 rx->skb->pkt_type = PACKET_OTHERHOST;
391
392 /* Drop disallowed frame classes based on STA auth/assoc state; 444 /* Drop disallowed frame classes based on STA auth/assoc state;
393 * IEEE 802.11, Chap 5.5. 445 * IEEE 802.11, Chap 5.5.
394 * 446 *
@@ -400,7 +452,7 @@ ieee80211_rx_h_check(struct ieee80211_txrx_data *rx)
400 if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || 452 if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA ||
401 ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && 453 ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL &&
402 (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && 454 (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) &&
403 rx->sdata->type != IEEE80211_IF_TYPE_IBSS && 455 rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
404 (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { 456 (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) {
405 if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && 457 if ((!(rx->fc & IEEE80211_FCTL_FROMDS) &&
406 !(rx->fc & IEEE80211_FCTL_TODS) && 458 !(rx->fc & IEEE80211_FCTL_TODS) &&
@@ -509,9 +561,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_txrx_data *rx)
509 rx->key->tx_rx_count++; 561 rx->key->tx_rx_count++;
510 /* TODO: add threshold stuff again */ 562 /* TODO: add threshold stuff again */
511 } else { 563 } else {
564#ifdef CONFIG_MAC80211_DEBUG
512 if (net_ratelimit()) 565 if (net_ratelimit())
513 printk(KERN_DEBUG "%s: RX protected frame," 566 printk(KERN_DEBUG "%s: RX protected frame,"
514 " but have no key\n", rx->dev->name); 567 " but have no key\n", rx->dev->name);
568#endif /* CONFIG_MAC80211_DEBUG */
515 return TXRX_DROP; 569 return TXRX_DROP;
516 } 570 }
517 571
@@ -618,13 +672,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx)
618 /* Update last_rx only for IBSS packets which are for the current 672 /* Update last_rx only for IBSS packets which are for the current
619 * BSSID to avoid keeping the current IBSS network alive in cases where 673 * BSSID to avoid keeping the current IBSS network alive in cases where
620 * other STAs are using different BSSID. */ 674 * other STAs are using different BSSID. */
621 if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { 675 if (rx->sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
622 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); 676 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
677 IEEE80211_IF_TYPE_IBSS);
623 if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) 678 if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0)
624 sta->last_rx = jiffies; 679 sta->last_rx = jiffies;
625 } else 680 } else
626 if (!is_multicast_ether_addr(hdr->addr1) || 681 if (!is_multicast_ether_addr(hdr->addr1) ||
627 rx->sdata->type == IEEE80211_IF_TYPE_STA) { 682 rx->sdata->vif.type == IEEE80211_IF_TYPE_STA) {
628 /* Update last_rx only for unicast frames in order to prevent 683 /* Update last_rx only for unicast frames in order to prevent
629 * the Probe Request frames (the only broadcast frames from a 684 * the Probe Request frames (the only broadcast frames from a
630 * STA in infrastructure mode) from keeping a connection alive. 685 * STA in infrastructure mode) from keeping a connection alive.
@@ -868,6 +923,7 @@ ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx)
868static ieee80211_txrx_result 923static ieee80211_txrx_result
869ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) 924ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
870{ 925{
926 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
871 struct sk_buff *skb; 927 struct sk_buff *skb;
872 int no_pending_pkts; 928 int no_pending_pkts;
873 DECLARE_MAC_BUF(mac); 929 DECLARE_MAC_BUF(mac);
@@ -878,6 +934,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
878 !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))) 934 !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)))
879 return TXRX_CONTINUE; 935 return TXRX_CONTINUE;
880 936
937 if ((sdata->vif.type != IEEE80211_IF_TYPE_AP) &&
938 (sdata->vif.type != IEEE80211_IF_TYPE_VLAN))
939 return TXRX_DROP;
940
881 skb = skb_dequeue(&rx->sta->tx_filtered); 941 skb = skb_dequeue(&rx->sta->tx_filtered);
882 if (!skb) { 942 if (!skb) {
883 skb = skb_dequeue(&rx->sta->ps_tx_buf); 943 skb = skb_dequeue(&rx->sta->ps_tx_buf);
@@ -954,68 +1014,54 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx)
954 return TXRX_CONTINUE; 1014 return TXRX_CONTINUE;
955} 1015}
956 1016
957static ieee80211_txrx_result 1017static int
958ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) 1018ieee80211_802_1x_port_control(struct ieee80211_txrx_data *rx)
959{ 1019{
960 if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && 1020 if (unlikely(rx->sdata->ieee802_1x_pac &&
961 rx->sdata->type != IEEE80211_IF_TYPE_STA && 1021 (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)))) {
962 (rx->flags & IEEE80211_TXRXD_RXRA_MATCH))
963 return TXRX_CONTINUE;
964
965 if (unlikely(rx->sdata->ieee802_1x &&
966 (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
967 (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
968 (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) &&
969 !ieee80211_is_eapol(rx->skb))) {
970#ifdef CONFIG_MAC80211_DEBUG 1022#ifdef CONFIG_MAC80211_DEBUG
971 struct ieee80211_hdr *hdr = 1023 printk(KERN_DEBUG "%s: dropped frame "
972 (struct ieee80211_hdr *) rx->skb->data; 1024 "(unauthorized port)\n", rx->dev->name);
973 DECLARE_MAC_BUF(mac);
974 printk(KERN_DEBUG "%s: dropped frame from %s"
975 " (unauthorized port)\n", rx->dev->name,
976 print_mac(mac, hdr->addr2));
977#endif /* CONFIG_MAC80211_DEBUG */ 1025#endif /* CONFIG_MAC80211_DEBUG */
978 return TXRX_DROP; 1026 return -EACCES;
979 } 1027 }
980 1028
981 return TXRX_CONTINUE; 1029 return 0;
982} 1030}
983 1031
984static ieee80211_txrx_result 1032static int
985ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) 1033ieee80211_drop_unencrypted(struct ieee80211_txrx_data *rx)
986{ 1034{
987 /* 1035 /*
988 * Pass through unencrypted frames if the hardware has 1036 * Pass through unencrypted frames if the hardware has
989 * decrypted them already. 1037 * decrypted them already.
990 */ 1038 */
991 if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED) 1039 if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED)
992 return TXRX_CONTINUE; 1040 return 0;
993 1041
994 /* Drop unencrypted frames if key is set. */ 1042 /* Drop unencrypted frames if key is set. */
995 if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && 1043 if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) &&
996 (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && 1044 (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
997 (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && 1045 (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
998 rx->sdata->drop_unencrypted && 1046 (rx->key || rx->sdata->drop_unencrypted))) {
999 (rx->sdata->eapol == 0 || !ieee80211_is_eapol(rx->skb)))) {
1000 if (net_ratelimit()) 1047 if (net_ratelimit())
1001 printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " 1048 printk(KERN_DEBUG "%s: RX non-WEP frame, but expected "
1002 "encryption\n", rx->dev->name); 1049 "encryption\n", rx->dev->name);
1003 return TXRX_DROP; 1050 return -EACCES;
1004 } 1051 }
1005 return TXRX_CONTINUE; 1052 return 0;
1006} 1053}
1007 1054
1008static ieee80211_txrx_result 1055static int
1009ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) 1056ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
1010{ 1057{
1011 struct net_device *dev = rx->dev; 1058 struct net_device *dev = rx->dev;
1012 struct ieee80211_local *local = rx->local;
1013 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; 1059 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
1014 u16 fc, hdrlen, ethertype; 1060 u16 fc, hdrlen, ethertype;
1015 u8 *payload; 1061 u8 *payload;
1016 u8 dst[ETH_ALEN]; 1062 u8 dst[ETH_ALEN];
1017 u8 src[ETH_ALEN]; 1063 u8 src[ETH_ALEN];
1018 struct sk_buff *skb = rx->skb, *skb2; 1064 struct sk_buff *skb = rx->skb;
1019 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1065 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1020 DECLARE_MAC_BUF(mac); 1066 DECLARE_MAC_BUF(mac);
1021 DECLARE_MAC_BUF(mac2); 1067 DECLARE_MAC_BUF(mac2);
@@ -1023,11 +1069,9 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1023 DECLARE_MAC_BUF(mac4); 1069 DECLARE_MAC_BUF(mac4);
1024 1070
1025 fc = rx->fc; 1071 fc = rx->fc;
1026 if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
1027 return TXRX_CONTINUE;
1028 1072
1029 if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) 1073 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
1030 return TXRX_DROP; 1074 return -1;
1031 1075
1032 hdrlen = ieee80211_get_hdrlen(fc); 1076 hdrlen = ieee80211_get_hdrlen(fc);
1033 1077
@@ -1047,8 +1091,8 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1047 memcpy(dst, hdr->addr3, ETH_ALEN); 1091 memcpy(dst, hdr->addr3, ETH_ALEN);
1048 memcpy(src, hdr->addr2, ETH_ALEN); 1092 memcpy(src, hdr->addr2, ETH_ALEN);
1049 1093
1050 if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && 1094 if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP &&
1051 sdata->type != IEEE80211_IF_TYPE_VLAN)) { 1095 sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) {
1052 if (net_ratelimit()) 1096 if (net_ratelimit())
1053 printk(KERN_DEBUG "%s: dropped ToDS frame " 1097 printk(KERN_DEBUG "%s: dropped ToDS frame "
1054 "(BSSID=%s SA=%s DA=%s)\n", 1098 "(BSSID=%s SA=%s DA=%s)\n",
@@ -1056,7 +1100,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1056 print_mac(mac, hdr->addr1), 1100 print_mac(mac, hdr->addr1),
1057 print_mac(mac2, hdr->addr2), 1101 print_mac(mac2, hdr->addr2),
1058 print_mac(mac3, hdr->addr3)); 1102 print_mac(mac3, hdr->addr3));
1059 return TXRX_DROP; 1103 return -1;
1060 } 1104 }
1061 break; 1105 break;
1062 case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 1106 case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -1064,7 +1108,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1064 memcpy(dst, hdr->addr3, ETH_ALEN); 1108 memcpy(dst, hdr->addr3, ETH_ALEN);
1065 memcpy(src, hdr->addr4, ETH_ALEN); 1109 memcpy(src, hdr->addr4, ETH_ALEN);
1066 1110
1067 if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { 1111 if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS)) {
1068 if (net_ratelimit()) 1112 if (net_ratelimit())
1069 printk(KERN_DEBUG "%s: dropped FromDS&ToDS " 1113 printk(KERN_DEBUG "%s: dropped FromDS&ToDS "
1070 "frame (RA=%s TA=%s DA=%s SA=%s)\n", 1114 "frame (RA=%s TA=%s DA=%s SA=%s)\n",
@@ -1073,7 +1117,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1073 print_mac(mac2, hdr->addr2), 1117 print_mac(mac2, hdr->addr2),
1074 print_mac(mac3, hdr->addr3), 1118 print_mac(mac3, hdr->addr3),
1075 print_mac(mac4, hdr->addr4)); 1119 print_mac(mac4, hdr->addr4));
1076 return TXRX_DROP; 1120 return -1;
1077 } 1121 }
1078 break; 1122 break;
1079 case IEEE80211_FCTL_FROMDS: 1123 case IEEE80211_FCTL_FROMDS:
@@ -1081,17 +1125,17 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1081 memcpy(dst, hdr->addr1, ETH_ALEN); 1125 memcpy(dst, hdr->addr1, ETH_ALEN);
1082 memcpy(src, hdr->addr3, ETH_ALEN); 1126 memcpy(src, hdr->addr3, ETH_ALEN);
1083 1127
1084 if (sdata->type != IEEE80211_IF_TYPE_STA || 1128 if (sdata->vif.type != IEEE80211_IF_TYPE_STA ||
1085 (is_multicast_ether_addr(dst) && 1129 (is_multicast_ether_addr(dst) &&
1086 !compare_ether_addr(src, dev->dev_addr))) 1130 !compare_ether_addr(src, dev->dev_addr)))
1087 return TXRX_DROP; 1131 return -1;
1088 break; 1132 break;
1089 case 0: 1133 case 0:
1090 /* DA SA BSSID */ 1134 /* DA SA BSSID */
1091 memcpy(dst, hdr->addr1, ETH_ALEN); 1135 memcpy(dst, hdr->addr1, ETH_ALEN);
1092 memcpy(src, hdr->addr2, ETH_ALEN); 1136 memcpy(src, hdr->addr2, ETH_ALEN);
1093 1137
1094 if (sdata->type != IEEE80211_IF_TYPE_IBSS) { 1138 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
1095 if (net_ratelimit()) { 1139 if (net_ratelimit()) {
1096 printk(KERN_DEBUG "%s: dropped IBSS frame " 1140 printk(KERN_DEBUG "%s: dropped IBSS frame "
1097 "(DA=%s SA=%s BSSID=%s)\n", 1141 "(DA=%s SA=%s BSSID=%s)\n",
@@ -1100,21 +1144,20 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1100 print_mac(mac2, hdr->addr2), 1144 print_mac(mac2, hdr->addr2),
1101 print_mac(mac3, hdr->addr3)); 1145 print_mac(mac3, hdr->addr3));
1102 } 1146 }
1103 return TXRX_DROP; 1147 return -1;
1104 } 1148 }
1105 break; 1149 break;
1106 } 1150 }
1107 1151
1108 payload = skb->data + hdrlen;
1109
1110 if (unlikely(skb->len - hdrlen < 8)) { 1152 if (unlikely(skb->len - hdrlen < 8)) {
1111 if (net_ratelimit()) { 1153 if (net_ratelimit()) {
1112 printk(KERN_DEBUG "%s: RX too short data frame " 1154 printk(KERN_DEBUG "%s: RX too short data frame "
1113 "payload\n", dev->name); 1155 "payload\n", dev->name);
1114 } 1156 }
1115 return TXRX_DROP; 1157 return -1;
1116 } 1158 }
1117 1159
1160 payload = skb->data + hdrlen;
1118 ethertype = (payload[6] << 8) | payload[7]; 1161 ethertype = (payload[6] << 8) | payload[7];
1119 1162
1120 if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && 1163 if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
@@ -1128,6 +1171,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1128 } else { 1171 } else {
1129 struct ethhdr *ehdr; 1172 struct ethhdr *ehdr;
1130 __be16 len; 1173 __be16 len;
1174
1131 skb_pull(skb, hdrlen); 1175 skb_pull(skb, hdrlen);
1132 len = htons(skb->len); 1176 len = htons(skb->len);
1133 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); 1177 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
@@ -1135,36 +1179,72 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1135 memcpy(ehdr->h_source, src, ETH_ALEN); 1179 memcpy(ehdr->h_source, src, ETH_ALEN);
1136 ehdr->h_proto = len; 1180 ehdr->h_proto = len;
1137 } 1181 }
1138 skb->dev = dev; 1182 return 0;
1183}
1184
1185/*
1186 * requires that rx->skb is a frame with ethernet header
1187 */
1188static bool ieee80211_frame_allowed(struct ieee80211_txrx_data *rx)
1189{
1190 static const u8 pae_group_addr[ETH_ALEN]
1191 = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 };
1192 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1139 1193
1140 skb2 = NULL; 1194 /*
1195 * Allow EAPOL frames to us/the PAE group address regardless
1196 * of whether the frame was encrypted or not.
1197 */
1198 if (ehdr->h_proto == htons(ETH_P_PAE) &&
1199 (compare_ether_addr(ehdr->h_dest, rx->dev->dev_addr) == 0 ||
1200 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
1201 return true;
1141 1202
1142 dev->stats.rx_packets++; 1203 if (ieee80211_802_1x_port_control(rx) ||
1143 dev->stats.rx_bytes += skb->len; 1204 ieee80211_drop_unencrypted(rx))
1205 return false;
1206
1207 return true;
1208}
1144 1209
1145 if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP 1210/*
1146 || sdata->type == IEEE80211_IF_TYPE_VLAN) && 1211 * requires that rx->skb is a frame with ethernet header
1212 */
1213static void
1214ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
1215{
1216 struct net_device *dev = rx->dev;
1217 struct ieee80211_local *local = rx->local;
1218 struct sk_buff *skb, *xmit_skb;
1219 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1220 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1221 struct sta_info *dsta;
1222
1223 skb = rx->skb;
1224 xmit_skb = NULL;
1225
1226 if (local->bridge_packets && (sdata->vif.type == IEEE80211_IF_TYPE_AP ||
1227 sdata->vif.type == IEEE80211_IF_TYPE_VLAN) &&
1147 (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { 1228 (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) {
1148 if (is_multicast_ether_addr(skb->data)) { 1229 if (is_multicast_ether_addr(ehdr->h_dest)) {
1149 /* send multicast frames both to higher layers in 1230 /*
1150 * local net stack and back to the wireless media */ 1231 * send multicast frames both to higher layers in
1151 skb2 = skb_copy(skb, GFP_ATOMIC); 1232 * local net stack and back to the wireless medium
1152 if (!skb2 && net_ratelimit()) 1233 */
1234 xmit_skb = skb_copy(skb, GFP_ATOMIC);
1235 if (!xmit_skb && net_ratelimit())
1153 printk(KERN_DEBUG "%s: failed to clone " 1236 printk(KERN_DEBUG "%s: failed to clone "
1154 "multicast frame\n", dev->name); 1237 "multicast frame\n", dev->name);
1155 } else { 1238 } else {
1156 struct sta_info *dsta;
1157 dsta = sta_info_get(local, skb->data); 1239 dsta = sta_info_get(local, skb->data);
1158 if (dsta && !dsta->dev) { 1240 if (dsta && dsta->dev == dev) {
1159 if (net_ratelimit()) 1241 /*
1160 printk(KERN_DEBUG "Station with null " 1242 * The destination station is associated to
1161 "dev structure!\n"); 1243 * this AP (in this VLAN), so send the frame
1162 } else if (dsta && dsta->dev == dev) { 1244 * directly to it and do not pass it to local
1163 /* Destination station is associated to this 1245 * net stack.
1164 * AP, so send the frame directly to it and
1165 * do not pass the frame to local net stack.
1166 */ 1246 */
1167 skb2 = skb; 1247 xmit_skb = skb;
1168 skb = NULL; 1248 skb = NULL;
1169 } 1249 }
1170 if (dsta) 1250 if (dsta)
@@ -1179,18 +1259,207 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1179 netif_rx(skb); 1259 netif_rx(skb);
1180 } 1260 }
1181 1261
1182 if (skb2) { 1262 if (xmit_skb) {
1183 /* send to wireless media */ 1263 /* send to wireless media */
1184 skb2->protocol = __constant_htons(ETH_P_802_3); 1264 xmit_skb->protocol = htons(ETH_P_802_3);
1185 skb_set_network_header(skb2, 0); 1265 skb_reset_network_header(xmit_skb);
1186 skb_set_mac_header(skb2, 0); 1266 skb_reset_mac_header(xmit_skb);
1187 dev_queue_xmit(skb2); 1267 dev_queue_xmit(xmit_skb);
1268 }
1269}
1270
1271static ieee80211_txrx_result
1272ieee80211_rx_h_amsdu(struct ieee80211_txrx_data *rx)
1273{
1274 struct net_device *dev = rx->dev;
1275 struct ieee80211_local *local = rx->local;
1276 u16 fc, ethertype;
1277 u8 *payload;
1278 struct sk_buff *skb = rx->skb, *frame = NULL;
1279 const struct ethhdr *eth;
1280 int remaining, err;
1281 u8 dst[ETH_ALEN];
1282 u8 src[ETH_ALEN];
1283 DECLARE_MAC_BUF(mac);
1284
1285 fc = rx->fc;
1286 if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
1287 return TXRX_CONTINUE;
1288
1289 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
1290 return TXRX_DROP;
1291
1292 if (!(rx->flags & IEEE80211_TXRXD_RX_AMSDU))
1293 return TXRX_CONTINUE;
1294
1295 err = ieee80211_data_to_8023(rx);
1296 if (unlikely(err))
1297 return TXRX_DROP;
1298
1299 skb->dev = dev;
1300
1301 dev->stats.rx_packets++;
1302 dev->stats.rx_bytes += skb->len;
1303
1304 /* skip the wrapping header */
1305 eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
1306 if (!eth)
1307 return TXRX_DROP;
1308
1309 while (skb != frame) {
1310 u8 padding;
1311 __be16 len = eth->h_proto;
1312 unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len);
1313
1314 remaining = skb->len;
1315 memcpy(dst, eth->h_dest, ETH_ALEN);
1316 memcpy(src, eth->h_source, ETH_ALEN);
1317
1318 padding = ((4 - subframe_len) & 0x3);
1319 /* the last MSDU has no padding */
1320 if (subframe_len > remaining) {
1321 printk(KERN_DEBUG "%s: wrong buffer size", dev->name);
1322 return TXRX_DROP;
1323 }
1324
1325 skb_pull(skb, sizeof(struct ethhdr));
1326 /* if last subframe reuse skb */
1327 if (remaining <= subframe_len + padding)
1328 frame = skb;
1329 else {
1330 frame = dev_alloc_skb(local->hw.extra_tx_headroom +
1331 subframe_len);
1332
1333 if (frame == NULL)
1334 return TXRX_DROP;
1335
1336 skb_reserve(frame, local->hw.extra_tx_headroom +
1337 sizeof(struct ethhdr));
1338 memcpy(skb_put(frame, ntohs(len)), skb->data,
1339 ntohs(len));
1340
1341 eth = (struct ethhdr *) skb_pull(skb, ntohs(len) +
1342 padding);
1343 if (!eth) {
1344 printk(KERN_DEBUG "%s: wrong buffer size ",
1345 dev->name);
1346 dev_kfree_skb(frame);
1347 return TXRX_DROP;
1348 }
1349 }
1350
1351 skb_reset_network_header(frame);
1352 frame->dev = dev;
1353 frame->priority = skb->priority;
1354 rx->skb = frame;
1355
1356 payload = frame->data;
1357 ethertype = (payload[6] << 8) | payload[7];
1358
1359 if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
1360 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
1361 compare_ether_addr(payload,
1362 bridge_tunnel_header) == 0)) {
1363 /* remove RFC1042 or Bridge-Tunnel
1364 * encapsulation and replace EtherType */
1365 skb_pull(frame, 6);
1366 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
1367 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
1368 } else {
1369 memcpy(skb_push(frame, sizeof(__be16)),
1370 &len, sizeof(__be16));
1371 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
1372 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
1373 }
1374
1375 if (!ieee80211_frame_allowed(rx)) {
1376 if (skb == frame) /* last frame */
1377 return TXRX_DROP;
1378 dev_kfree_skb(frame);
1379 continue;
1380 }
1381
1382 ieee80211_deliver_skb(rx);
1188 } 1383 }
1189 1384
1190 return TXRX_QUEUED; 1385 return TXRX_QUEUED;
1191} 1386}
1192 1387
1193static ieee80211_txrx_result 1388static ieee80211_txrx_result
1389ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1390{
1391 struct net_device *dev = rx->dev;
1392 u16 fc;
1393 int err;
1394
1395 fc = rx->fc;
1396 if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
1397 return TXRX_CONTINUE;
1398
1399 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
1400 return TXRX_DROP;
1401
1402 err = ieee80211_data_to_8023(rx);
1403 if (unlikely(err))
1404 return TXRX_DROP;
1405
1406 if (!ieee80211_frame_allowed(rx))
1407 return TXRX_DROP;
1408
1409 rx->skb->dev = dev;
1410
1411 dev->stats.rx_packets++;
1412 dev->stats.rx_bytes += rx->skb->len;
1413
1414 ieee80211_deliver_skb(rx);
1415
1416 return TXRX_QUEUED;
1417}
1418
1419static ieee80211_txrx_result
1420ieee80211_rx_h_ctrl(struct ieee80211_txrx_data *rx)
1421{
1422 struct ieee80211_local *local = rx->local;
1423 struct ieee80211_hw *hw = &local->hw;
1424 struct sk_buff *skb = rx->skb;
1425 struct ieee80211_bar *bar = (struct ieee80211_bar *) skb->data;
1426 struct tid_ampdu_rx *tid_agg_rx;
1427 u16 start_seq_num;
1428 u16 tid;
1429
1430 if (likely((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL))
1431 return TXRX_CONTINUE;
1432
1433 if ((rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BACK_REQ) {
1434 if (!rx->sta)
1435 return TXRX_CONTINUE;
1436 tid = le16_to_cpu(bar->control) >> 12;
1437 tid_agg_rx = &(rx->sta->ampdu_mlme.tid_rx[tid]);
1438 if (tid_agg_rx->state != HT_AGG_STATE_OPERATIONAL)
1439 return TXRX_CONTINUE;
1440
1441 start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
1442
1443 /* reset session timer */
1444 if (tid_agg_rx->timeout) {
1445 unsigned long expires =
1446 jiffies + (tid_agg_rx->timeout / 1000) * HZ;
1447 mod_timer(&tid_agg_rx->session_timer, expires);
1448 }
1449
1450 /* manage reordering buffer according to requested */
1451 /* sequence number */
1452 rcu_read_lock();
1453 ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL,
1454 start_seq_num, 1);
1455 rcu_read_unlock();
1456 return TXRX_DROP;
1457 }
1458
1459 return TXRX_CONTINUE;
1460}
1461
1462static ieee80211_txrx_result
1194ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) 1463ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx)
1195{ 1464{
1196 struct ieee80211_sub_if_data *sdata; 1465 struct ieee80211_sub_if_data *sdata;
@@ -1199,8 +1468,8 @@ ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx)
1199 return TXRX_DROP; 1468 return TXRX_DROP;
1200 1469
1201 sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); 1470 sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
1202 if ((sdata->type == IEEE80211_IF_TYPE_STA || 1471 if ((sdata->vif.type == IEEE80211_IF_TYPE_STA ||
1203 sdata->type == IEEE80211_IF_TYPE_IBSS) && 1472 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) &&
1204 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) 1473 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
1205 ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); 1474 ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status);
1206 else 1475 else
@@ -1292,7 +1561,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
1292 goto ignore; 1561 goto ignore;
1293 } 1562 }
1294 1563
1295 if (rx->sdata->type == IEEE80211_IF_TYPE_AP && keyidx) { 1564 if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) {
1296 /* 1565 /*
1297 * APs with pairwise keys should never receive Michael MIC 1566 * APs with pairwise keys should never receive Michael MIC
1298 * errors for non-zero keyidx because these are reserved for 1567 * errors for non-zero keyidx because these are reserved for
@@ -1339,9 +1608,9 @@ ieee80211_rx_handler ieee80211_rx_handlers[] =
1339 * are not passed to user space by these functions 1608 * are not passed to user space by these functions
1340 */ 1609 */
1341 ieee80211_rx_h_remove_qos_control, 1610 ieee80211_rx_h_remove_qos_control,
1342 ieee80211_rx_h_802_1x_pae, 1611 ieee80211_rx_h_amsdu,
1343 ieee80211_rx_h_drop_unencrypted,
1344 ieee80211_rx_h_data, 1612 ieee80211_rx_h_data,
1613 ieee80211_rx_h_ctrl,
1345 ieee80211_rx_h_mgmt, 1614 ieee80211_rx_h_mgmt,
1346 NULL 1615 NULL
1347}; 1616};
@@ -1354,7 +1623,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1354{ 1623{
1355 int multicast = is_multicast_ether_addr(hdr->addr1); 1624 int multicast = is_multicast_ether_addr(hdr->addr1);
1356 1625
1357 switch (sdata->type) { 1626 switch (sdata->vif.type) {
1358 case IEEE80211_IF_TYPE_STA: 1627 case IEEE80211_IF_TYPE_STA:
1359 if (!bssid) 1628 if (!bssid)
1360 return 0; 1629 return 0;
@@ -1425,11 +1694,13 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1425} 1694}
1426 1695
1427/* 1696/*
1428 * This is the receive path handler. It is called by a low level driver when an 1697 * This is the actual Rx frames handler. as it blongs to Rx path it must
1429 * 802.11 MPDU is received from the hardware. 1698 * be called with rcu_read_lock protection.
1430 */ 1699 */
1431void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, 1700static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1432 struct ieee80211_rx_status *status) 1701 struct sk_buff *skb,
1702 struct ieee80211_rx_status *status,
1703 u32 load)
1433{ 1704{
1434 struct ieee80211_local *local = hw_to_local(hw); 1705 struct ieee80211_local *local = hw_to_local(hw);
1435 struct ieee80211_sub_if_data *sdata; 1706 struct ieee80211_sub_if_data *sdata;
@@ -1437,36 +1708,18 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1437 struct ieee80211_hdr *hdr; 1708 struct ieee80211_hdr *hdr;
1438 struct ieee80211_txrx_data rx; 1709 struct ieee80211_txrx_data rx;
1439 u16 type; 1710 u16 type;
1440 int prepres; 1711 int prepares;
1441 struct ieee80211_sub_if_data *prev = NULL; 1712 struct ieee80211_sub_if_data *prev = NULL;
1442 struct sk_buff *skb_new; 1713 struct sk_buff *skb_new;
1443 u8 *bssid; 1714 u8 *bssid;
1444 1715
1445 /*
1446 * key references and virtual interfaces are protected using RCU
1447 * and this requires that we are in a read-side RCU section during
1448 * receive processing
1449 */
1450 rcu_read_lock();
1451
1452 /*
1453 * Frames with failed FCS/PLCP checksum are not returned,
1454 * all other frames are returned without radiotap header
1455 * if it was previously present.
1456 * Also, frames with less than 16 bytes are dropped.
1457 */
1458 skb = ieee80211_rx_monitor(local, skb, status);
1459 if (!skb) {
1460 rcu_read_unlock();
1461 return;
1462 }
1463
1464 hdr = (struct ieee80211_hdr *) skb->data; 1716 hdr = (struct ieee80211_hdr *) skb->data;
1465 memset(&rx, 0, sizeof(rx)); 1717 memset(&rx, 0, sizeof(rx));
1466 rx.skb = skb; 1718 rx.skb = skb;
1467 rx.local = local; 1719 rx.local = local;
1468 1720
1469 rx.u.rx.status = status; 1721 rx.u.rx.status = status;
1722 rx.u.rx.load = load;
1470 rx.fc = le16_to_cpu(hdr->frame_control); 1723 rx.fc = le16_to_cpu(hdr->frame_control);
1471 type = rx.fc & IEEE80211_FCTL_FTYPE; 1724 type = rx.fc & IEEE80211_FCTL_FTYPE;
1472 1725
@@ -1484,7 +1737,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1484 goto end; 1737 goto end;
1485 } 1738 }
1486 1739
1487 if (unlikely(local->sta_scanning)) 1740 if (unlikely(local->sta_sw_scanning || local->sta_hw_scanning))
1488 rx.flags |= IEEE80211_TXRXD_RXIN_SCAN; 1741 rx.flags |= IEEE80211_TXRXD_RXIN_SCAN;
1489 1742
1490 if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, 1743 if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx,
@@ -1499,25 +1752,23 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1499 ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, 1752 ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx,
1500 rx.sta); 1753 rx.sta);
1501 sta_info_put(sta); 1754 sta_info_put(sta);
1502 rcu_read_unlock();
1503 return; 1755 return;
1504 } 1756 }
1505 1757
1506 bssid = ieee80211_get_bssid(hdr, skb->len);
1507
1508 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 1758 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
1509 if (!netif_running(sdata->dev)) 1759 if (!netif_running(sdata->dev))
1510 continue; 1760 continue;
1511 1761
1512 if (sdata->type == IEEE80211_IF_TYPE_MNTR) 1762 if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR)
1513 continue; 1763 continue;
1514 1764
1765 bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
1515 rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; 1766 rx.flags |= IEEE80211_TXRXD_RXRA_MATCH;
1516 prepres = prepare_for_handlers(sdata, bssid, &rx, hdr); 1767 prepares = prepare_for_handlers(sdata, bssid, &rx, hdr);
1517 /* prepare_for_handlers can change sta */ 1768 /* prepare_for_handlers can change sta */
1518 sta = rx.sta; 1769 sta = rx.sta;
1519 1770
1520 if (!prepres) 1771 if (!prepares)
1521 continue; 1772 continue;
1522 1773
1523 /* 1774 /*
@@ -1545,6 +1796,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1545 prev->dev->name); 1796 prev->dev->name);
1546 continue; 1797 continue;
1547 } 1798 }
1799 rx.fc = le16_to_cpu(hdr->frame_control);
1548 rx.skb = skb_new; 1800 rx.skb = skb_new;
1549 rx.dev = prev->dev; 1801 rx.dev = prev->dev;
1550 rx.sdata = prev; 1802 rx.sdata = prev;
@@ -1553,6 +1805,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1553 prev = sdata; 1805 prev = sdata;
1554 } 1806 }
1555 if (prev) { 1807 if (prev) {
1808 rx.fc = le16_to_cpu(hdr->frame_control);
1556 rx.skb = skb; 1809 rx.skb = skb;
1557 rx.dev = prev->dev; 1810 rx.dev = prev->dev;
1558 rx.sdata = prev; 1811 rx.sdata = prev;
@@ -1562,10 +1815,230 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1562 dev_kfree_skb(skb); 1815 dev_kfree_skb(skb);
1563 1816
1564 end: 1817 end:
1565 rcu_read_unlock(); 1818 if (sta)
1819 sta_info_put(sta);
1820}
1821
1822#define SEQ_MODULO 0x1000
1823#define SEQ_MASK 0xfff
1824
1825static inline int seq_less(u16 sq1, u16 sq2)
1826{
1827 return (((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1));
1828}
1829
1830static inline u16 seq_inc(u16 sq)
1831{
1832 return ((sq + 1) & SEQ_MASK);
1833}
1834
1835static inline u16 seq_sub(u16 sq1, u16 sq2)
1836{
1837 return ((sq1 - sq2) & SEQ_MASK);
1838}
1839
1840
1841/*
1842 * As it function blongs to Rx path it must be called with
1843 * the proper rcu_read_lock protection for its flow.
1844 */
1845u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
1846 struct tid_ampdu_rx *tid_agg_rx,
1847 struct sk_buff *skb, u16 mpdu_seq_num,
1848 int bar_req)
1849{
1850 struct ieee80211_local *local = hw_to_local(hw);
1851 struct ieee80211_rx_status status;
1852 u16 head_seq_num, buf_size;
1853 int index;
1854 u32 pkt_load;
1855
1856 buf_size = tid_agg_rx->buf_size;
1857 head_seq_num = tid_agg_rx->head_seq_num;
1566 1858
1859 /* frame with out of date sequence number */
1860 if (seq_less(mpdu_seq_num, head_seq_num)) {
1861 dev_kfree_skb(skb);
1862 return 1;
1863 }
1864
1865 /* if frame sequence number exceeds our buffering window size or
1866 * block Ack Request arrived - release stored frames */
1867 if ((!seq_less(mpdu_seq_num, head_seq_num + buf_size)) || (bar_req)) {
1868 /* new head to the ordering buffer */
1869 if (bar_req)
1870 head_seq_num = mpdu_seq_num;
1871 else
1872 head_seq_num =
1873 seq_inc(seq_sub(mpdu_seq_num, buf_size));
1874 /* release stored frames up to new head to stack */
1875 while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) {
1876 index = seq_sub(tid_agg_rx->head_seq_num,
1877 tid_agg_rx->ssn)
1878 % tid_agg_rx->buf_size;
1879
1880 if (tid_agg_rx->reorder_buf[index]) {
1881 /* release the reordered frames to stack */
1882 memcpy(&status,
1883 tid_agg_rx->reorder_buf[index]->cb,
1884 sizeof(status));
1885 pkt_load = ieee80211_rx_load_stats(local,
1886 tid_agg_rx->reorder_buf[index],
1887 &status);
1888 __ieee80211_rx_handle_packet(hw,
1889 tid_agg_rx->reorder_buf[index],
1890 &status, pkt_load);
1891 tid_agg_rx->stored_mpdu_num--;
1892 tid_agg_rx->reorder_buf[index] = NULL;
1893 }
1894 tid_agg_rx->head_seq_num =
1895 seq_inc(tid_agg_rx->head_seq_num);
1896 }
1897 if (bar_req)
1898 return 1;
1899 }
1900
1901 /* now the new frame is always in the range of the reordering */
1902 /* buffer window */
1903 index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn)
1904 % tid_agg_rx->buf_size;
1905 /* check if we already stored this frame */
1906 if (tid_agg_rx->reorder_buf[index]) {
1907 dev_kfree_skb(skb);
1908 return 1;
1909 }
1910
1911 /* if arrived mpdu is in the right order and nothing else stored */
1912 /* release it immediately */
1913 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
1914 tid_agg_rx->stored_mpdu_num == 0) {
1915 tid_agg_rx->head_seq_num =
1916 seq_inc(tid_agg_rx->head_seq_num);
1917 return 0;
1918 }
1919
1920 /* put the frame in the reordering buffer */
1921 tid_agg_rx->reorder_buf[index] = skb;
1922 tid_agg_rx->stored_mpdu_num++;
1923 /* release the buffer until next missing frame */
1924 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn)
1925 % tid_agg_rx->buf_size;
1926 while (tid_agg_rx->reorder_buf[index]) {
1927 /* release the reordered frame back to stack */
1928 memcpy(&status, tid_agg_rx->reorder_buf[index]->cb,
1929 sizeof(status));
1930 pkt_load = ieee80211_rx_load_stats(local,
1931 tid_agg_rx->reorder_buf[index],
1932 &status);
1933 __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index],
1934 &status, pkt_load);
1935 tid_agg_rx->stored_mpdu_num--;
1936 tid_agg_rx->reorder_buf[index] = NULL;
1937 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
1938 index = seq_sub(tid_agg_rx->head_seq_num,
1939 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
1940 }
1941 return 1;
1942}
1943
1944static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
1945 struct sk_buff *skb)
1946{
1947 struct ieee80211_hw *hw = &local->hw;
1948 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
1949 struct sta_info *sta;
1950 struct tid_ampdu_rx *tid_agg_rx;
1951 u16 fc, sc;
1952 u16 mpdu_seq_num;
1953 u8 ret = 0, *qc;
1954 int tid;
1955
1956 sta = sta_info_get(local, hdr->addr2);
1957 if (!sta)
1958 return ret;
1959
1960 fc = le16_to_cpu(hdr->frame_control);
1961
1962 /* filter the QoS data rx stream according to
1963 * STA/TID and check if this STA/TID is on aggregation */
1964 if (!WLAN_FC_IS_QOS_DATA(fc))
1965 goto end_reorder;
1966
1967 qc = skb->data + ieee80211_get_hdrlen(fc) - QOS_CONTROL_LEN;
1968 tid = qc[0] & QOS_CONTROL_TID_MASK;
1969 tid_agg_rx = &(sta->ampdu_mlme.tid_rx[tid]);
1970
1971 if (tid_agg_rx->state != HT_AGG_STATE_OPERATIONAL)
1972 goto end_reorder;
1973
1974 /* null data frames are excluded */
1975 if (unlikely(fc & IEEE80211_STYPE_NULLFUNC))
1976 goto end_reorder;
1977
1978 /* new un-ordered ampdu frame - process it */
1979
1980 /* reset session timer */
1981 if (tid_agg_rx->timeout) {
1982 unsigned long expires =
1983 jiffies + (tid_agg_rx->timeout / 1000) * HZ;
1984 mod_timer(&tid_agg_rx->session_timer, expires);
1985 }
1986
1987 /* if this mpdu is fragmented - terminate rx aggregation session */
1988 sc = le16_to_cpu(hdr->seq_ctrl);
1989 if (sc & IEEE80211_SCTL_FRAG) {
1990 ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr,
1991 tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP);
1992 ret = 1;
1993 goto end_reorder;
1994 }
1995
1996 /* according to mpdu sequence number deal with reordering buffer */
1997 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
1998 ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb,
1999 mpdu_seq_num, 0);
2000end_reorder:
1567 if (sta) 2001 if (sta)
1568 sta_info_put(sta); 2002 sta_info_put(sta);
2003 return ret;
2004}
2005
2006/*
2007 * This is the receive path handler. It is called by a low level driver when an
2008 * 802.11 MPDU is received from the hardware.
2009 */
2010void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
2011 struct ieee80211_rx_status *status)
2012{
2013 struct ieee80211_local *local = hw_to_local(hw);
2014 u32 pkt_load;
2015
2016 /*
2017 * key references and virtual interfaces are protected using RCU
2018 * and this requires that we are in a read-side RCU section during
2019 * receive processing
2020 */
2021 rcu_read_lock();
2022
2023 /*
2024 * Frames with failed FCS/PLCP checksum are not returned,
2025 * all other frames are returned without radiotap header
2026 * if it was previously present.
2027 * Also, frames with less than 16 bytes are dropped.
2028 */
2029 skb = ieee80211_rx_monitor(local, skb, status);
2030 if (!skb) {
2031 rcu_read_unlock();
2032 return;
2033 }
2034
2035 pkt_load = ieee80211_rx_load_stats(local, skb, status);
2036 local->channel_use_raw += pkt_load;
2037
2038 if (!ieee80211_rx_reorder_ampdu(local, skb))
2039 __ieee80211_rx_handle_packet(hw, skb, status, pkt_load);
2040
2041 rcu_read_unlock();
1569} 2042}
1570EXPORT_SYMBOL(__ieee80211_rx); 2043EXPORT_SYMBOL(__ieee80211_rx);
1571 2044
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index e8491554a5dc..1f74bd296357 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/if_arp.h> 16#include <linux/if_arp.h>
17#include <linux/timer.h>
17 18
18#include <net/mac80211.h> 19#include <net/mac80211.h>
19#include "ieee80211_i.h" 20#include "ieee80211_i.h"
@@ -103,6 +104,7 @@ static void sta_info_release(struct kref *kref)
103 struct sta_info *sta = container_of(kref, struct sta_info, kref); 104 struct sta_info *sta = container_of(kref, struct sta_info, kref);
104 struct ieee80211_local *local = sta->local; 105 struct ieee80211_local *local = sta->local;
105 struct sk_buff *skb; 106 struct sk_buff *skb;
107 int i;
106 108
107 /* free sta structure; it has already been removed from 109 /* free sta structure; it has already been removed from
108 * hash table etc. external structures. Make sure that all 110 * hash table etc. external structures. Make sure that all
@@ -115,6 +117,8 @@ static void sta_info_release(struct kref *kref)
115 while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { 117 while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
116 dev_kfree_skb_any(skb); 118 dev_kfree_skb_any(skb);
117 } 119 }
120 for (i = 0; i < STA_TID_NUM; i++)
121 del_timer_sync(&sta->ampdu_mlme.tid_rx[i].session_timer);
118 rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); 122 rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv);
119 rate_control_put(sta->rate_ctrl); 123 rate_control_put(sta->rate_ctrl);
120 kfree(sta); 124 kfree(sta);
@@ -132,6 +136,7 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
132 struct net_device *dev, u8 *addr, gfp_t gfp) 136 struct net_device *dev, u8 *addr, gfp_t gfp)
133{ 137{
134 struct sta_info *sta; 138 struct sta_info *sta;
139 int i;
135 DECLARE_MAC_BUF(mac); 140 DECLARE_MAC_BUF(mac);
136 141
137 sta = kzalloc(sizeof(*sta), gfp); 142 sta = kzalloc(sizeof(*sta), gfp);
@@ -151,6 +156,19 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
151 memcpy(sta->addr, addr, ETH_ALEN); 156 memcpy(sta->addr, addr, ETH_ALEN);
152 sta->local = local; 157 sta->local = local;
153 sta->dev = dev; 158 sta->dev = dev;
159 spin_lock_init(&sta->ampdu_mlme.ampdu_rx);
160 for (i = 0; i < STA_TID_NUM; i++) {
161 /* timer_to_tid must be initialized with identity mapping to
162 * enable session_timer's data differentiation. refer to
163 * sta_rx_agg_session_timer_expired for useage */
164 sta->timer_to_tid[i] = i;
165 /* rx timers */
166 sta->ampdu_mlme.tid_rx[i].session_timer.function =
167 sta_rx_agg_session_timer_expired;
168 sta->ampdu_mlme.tid_rx[i].session_timer.data =
169 (unsigned long)&sta->timer_to_tid[i];
170 init_timer(&sta->ampdu_mlme.tid_rx[i].session_timer);
171 }
154 skb_queue_head_init(&sta->ps_tx_buf); 172 skb_queue_head_init(&sta->ps_tx_buf);
155 skb_queue_head_init(&sta->tx_filtered); 173 skb_queue_head_init(&sta->tx_filtered);
156 __sta_info_get(sta); /* sta used by caller, decremented by 174 __sta_info_get(sta); /* sta used by caller, decremented by
@@ -159,9 +177,16 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
159 list_add(&sta->list, &local->sta_list); 177 list_add(&sta->list, &local->sta_list);
160 local->num_sta++; 178 local->num_sta++;
161 sta_info_hash_add(local, sta); 179 sta_info_hash_add(local, sta);
162 if (local->ops->sta_notify) 180 if (local->ops->sta_notify) {
163 local->ops->sta_notify(local_to_hw(local), dev->ifindex, 181 struct ieee80211_sub_if_data *sdata;
164 STA_NOTIFY_ADD, addr); 182
183 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
184 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
185 sdata = sdata->u.vlan.ap;
186
187 local->ops->sta_notify(local_to_hw(local), &sdata->vif,
188 STA_NOTIFY_ADD, addr);
189 }
165 write_unlock_bh(&local->sta_lock); 190 write_unlock_bh(&local->sta_lock);
166 191
167#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 192#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -229,9 +254,17 @@ void sta_info_free(struct sta_info *sta)
229 ieee80211_key_free(sta->key); 254 ieee80211_key_free(sta->key);
230 sta->key = NULL; 255 sta->key = NULL;
231 256
232 if (local->ops->sta_notify) 257 if (local->ops->sta_notify) {
233 local->ops->sta_notify(local_to_hw(local), sta->dev->ifindex, 258 struct ieee80211_sub_if_data *sdata;
234 STA_NOTIFY_REMOVE, sta->addr); 259
260 sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
261
262 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
263 sdata = sdata->u.vlan.ap;
264
265 local->ops->sta_notify(local_to_hw(local), &sdata->vif,
266 STA_NOTIFY_REMOVE, sta->addr);
267 }
235 268
236 rate_control_remove_sta_debugfs(sta); 269 rate_control_remove_sta_debugfs(sta);
237 ieee80211_sta_debugfs_remove(sta); 270 ieee80211_sta_debugfs_remove(sta);
@@ -306,7 +339,8 @@ static void sta_info_cleanup(unsigned long data)
306 } 339 }
307 read_unlock_bh(&local->sta_lock); 340 read_unlock_bh(&local->sta_lock);
308 341
309 local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; 342 local->sta_cleanup.expires =
343 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
310 add_timer(&local->sta_cleanup); 344 add_timer(&local->sta_cleanup);
311} 345}
312 346
@@ -344,10 +378,10 @@ void sta_info_init(struct ieee80211_local *local)
344 rwlock_init(&local->sta_lock); 378 rwlock_init(&local->sta_lock);
345 INIT_LIST_HEAD(&local->sta_list); 379 INIT_LIST_HEAD(&local->sta_list);
346 380
347 init_timer(&local->sta_cleanup); 381 setup_timer(&local->sta_cleanup, sta_info_cleanup,
348 local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; 382 (unsigned long)local);
349 local->sta_cleanup.data = (unsigned long) local; 383 local->sta_cleanup.expires =
350 local->sta_cleanup.function = sta_info_cleanup; 384 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
351 385
352#ifdef CONFIG_MAC80211_DEBUGFS 386#ifdef CONFIG_MAC80211_DEBUGFS
353 INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_task); 387 INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_task);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 8f7ebe41c024..96fe3ed95038 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -31,6 +31,51 @@
31#define WLAN_STA_WME BIT(9) 31#define WLAN_STA_WME BIT(9)
32#define WLAN_STA_WDS BIT(27) 32#define WLAN_STA_WDS BIT(27)
33 33
34#define STA_TID_NUM 16
35#define ADDBA_RESP_INTERVAL HZ
36
37#define HT_AGG_STATE_INITIATOR_SHIFT (4)
38
39#define HT_AGG_STATE_REQ_STOP_BA_MSK BIT(3)
40
41#define HT_AGG_STATE_IDLE (0x0)
42#define HT_AGG_STATE_OPERATIONAL (0x7)
43
44/**
45 * struct tid_ampdu_rx - TID aggregation information (Rx).
46 *
47 * @state: TID's state in session state machine.
48 * @dialog_token: dialog token for aggregation session
49 * @ssn: Starting Sequence Number expected to be aggregated.
50 * @buf_size: buffer size for incoming A-MPDUs
51 * @timeout: reset timer value.
52 * @head_seq_num: head sequence number in reordering buffer.
53 * @stored_mpdu_num: number of MPDUs in reordering buffer
54 * @reorder_buf: buffer to reorder incoming aggregated MPDUs
55 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
56 */
57struct tid_ampdu_rx {
58 u8 state;
59 u8 dialog_token;
60 u16 ssn;
61 u16 buf_size;
62 u16 timeout;
63 u16 head_seq_num;
64 u16 stored_mpdu_num;
65 struct sk_buff **reorder_buf;
66 struct timer_list session_timer;
67};
68
69/**
70 * struct sta_ampdu_mlme - STA aggregation information.
71 *
72 * @tid_agg_info_rx: aggregation info for Rx per TID
73 * @ampdu_rx: for locking sections in aggregation Rx flow
74 */
75struct sta_ampdu_mlme {
76 struct tid_ampdu_rx tid_rx[STA_TID_NUM];
77 spinlock_t ampdu_rx;
78};
34 79
35struct sta_info { 80struct sta_info {
36 struct kref kref; 81 struct kref kref;
@@ -99,6 +144,11 @@ struct sta_info {
99 144
100 u16 listen_interval; 145 u16 listen_interval;
101 146
147 struct ieee80211_ht_info ht_info; /* 802.11n HT capabilities
148 of this STA */
149 struct sta_ampdu_mlme ampdu_mlme;
150 u8 timer_to_tid[STA_TID_NUM]; /* convert timer id to tid */
151
102#ifdef CONFIG_MAC80211_DEBUGFS 152#ifdef CONFIG_MAC80211_DEBUGFS
103 struct sta_info_debugfsdentries { 153 struct sta_info_debugfsdentries {
104 struct dentry *dir; 154 struct dentry *dir;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1a531543bccb..67b509edd431 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -176,7 +176,7 @@ static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr,
176 * to closest integer */ 176 * to closest integer */
177 177
178 dur = ieee80211_frame_duration(local, 10, rate, erp, 178 dur = ieee80211_frame_duration(local, 10, rate, erp,
179 tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); 179 tx->sdata->bss_conf.use_short_preamble);
180 180
181 if (next_frag_len) { 181 if (next_frag_len) {
182 /* Frame is fragmented: duration increases with time needed to 182 /* Frame is fragmented: duration increases with time needed to
@@ -185,8 +185,7 @@ static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr,
185 /* next fragment */ 185 /* next fragment */
186 dur += ieee80211_frame_duration(local, next_frag_len, 186 dur += ieee80211_frame_duration(local, next_frag_len,
187 txrate->rate, erp, 187 txrate->rate, erp,
188 tx->sdata->flags & 188 tx->sdata->bss_conf.use_short_preamble);
189 IEEE80211_SDATA_SHORT_PREAMBLE);
190 } 189 }
191 190
192 return dur; 191 return dur;
@@ -225,7 +224,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
225 if (unlikely(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) 224 if (unlikely(tx->flags & IEEE80211_TXRXD_TX_INJECTED))
226 return TXRX_CONTINUE; 225 return TXRX_CONTINUE;
227 226
228 if (unlikely(tx->local->sta_scanning != 0) && 227 if (unlikely(tx->local->sta_sw_scanning) &&
229 ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || 228 ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
230 (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) 229 (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ))
231 return TXRX_DROP; 230 return TXRX_DROP;
@@ -237,7 +236,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
237 236
238 if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) { 237 if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) {
239 if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && 238 if (unlikely(!(sta_flags & WLAN_STA_ASSOC) &&
240 tx->sdata->type != IEEE80211_IF_TYPE_IBSS && 239 tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
241 (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { 240 (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) {
242#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 241#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
243 DECLARE_MAC_BUF(mac); 242 DECLARE_MAC_BUF(mac);
@@ -251,7 +250,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
251 } else { 250 } else {
252 if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && 251 if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
253 tx->local->num_sta == 0 && 252 tx->local->num_sta == 0 &&
254 tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { 253 tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) {
255 /* 254 /*
256 * No associated STAs - no need to send multicast 255 * No associated STAs - no need to send multicast
257 * frames. 256 * frames.
@@ -261,18 +260,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
261 return TXRX_CONTINUE; 260 return TXRX_CONTINUE;
262 } 261 }
263 262
264 if (unlikely(/* !injected && */ tx->sdata->ieee802_1x &&
265 !(sta_flags & WLAN_STA_AUTHORIZED))) {
266#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
267 DECLARE_MAC_BUF(mac);
268 printk(KERN_DEBUG "%s: dropped frame to %s"
269 " (unauthorized port)\n", tx->dev->name,
270 print_mac(mac, hdr->addr1));
271#endif
272 I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port);
273 return TXRX_DROP;
274 }
275
276 return TXRX_CONTINUE; 263 return TXRX_CONTINUE;
277} 264}
278 265
@@ -306,7 +293,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
306 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 293 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
307 struct ieee80211_if_ap *ap; 294 struct ieee80211_if_ap *ap;
308 if (sdata->dev == local->mdev || 295 if (sdata->dev == local->mdev ||
309 sdata->type != IEEE80211_IF_TYPE_AP) 296 sdata->vif.type != IEEE80211_IF_TYPE_AP)
310 continue; 297 continue;
311 ap = &sdata->u.ap; 298 ap = &sdata->u.ap;
312 skb = skb_dequeue(&ap->ps_bc_buf); 299 skb = skb_dequeue(&ap->ps_bc_buf);
@@ -334,16 +321,27 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
334 wiphy_name(local->hw.wiphy), purged); 321 wiphy_name(local->hw.wiphy), purged);
335} 322}
336 323
337static inline ieee80211_txrx_result 324static ieee80211_txrx_result
338ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) 325ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
339{ 326{
340 /* broadcast/multicast frame */ 327 /*
341 /* If any of the associated stations is in power save mode, 328 * broadcast/multicast frame
342 * the frame is buffered to be sent after DTIM beacon frame */ 329 *
343 if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && 330 * If any of the associated stations is in power save mode,
344 tx->sdata->type != IEEE80211_IF_TYPE_WDS && 331 * the frame is buffered to be sent after DTIM beacon frame.
345 tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && 332 * This is done either by the hardware or us.
346 !(tx->fc & IEEE80211_FCTL_ORDER)) { 333 */
334
335 /* not AP/IBSS or ordered frame */
336 if (!tx->sdata->bss || (tx->fc & IEEE80211_FCTL_ORDER))
337 return TXRX_CONTINUE;
338
339 /* no stations in PS mode */
340 if (!atomic_read(&tx->sdata->bss->num_sta_ps))
341 return TXRX_CONTINUE;
342
343 /* buffered in mac80211 */
344 if (tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) {
347 if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) 345 if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
348 purge_old_ps_buffers(tx->local); 346 purge_old_ps_buffers(tx->local);
349 if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= 347 if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >=
@@ -360,10 +358,13 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
360 return TXRX_QUEUED; 358 return TXRX_QUEUED;
361 } 359 }
362 360
361 /* buffered in hardware */
362 tx->u.tx.control->flags |= IEEE80211_TXCTL_SEND_AFTER_DTIM;
363
363 return TXRX_CONTINUE; 364 return TXRX_CONTINUE;
364} 365}
365 366
366static inline ieee80211_txrx_result 367static ieee80211_txrx_result
367ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) 368ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx)
368{ 369{
369 struct sta_info *sta = tx->sta; 370 struct sta_info *sta = tx->sta;
@@ -420,7 +421,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx)
420 return TXRX_CONTINUE; 421 return TXRX_CONTINUE;
421} 422}
422 423
423
424static ieee80211_txrx_result 424static ieee80211_txrx_result
425ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) 425ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx)
426{ 426{
@@ -433,13 +433,11 @@ ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx)
433 return ieee80211_tx_h_multicast_ps_buf(tx); 433 return ieee80211_tx_h_multicast_ps_buf(tx);
434} 434}
435 435
436
437
438
439static ieee80211_txrx_result 436static ieee80211_txrx_result
440ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) 437ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
441{ 438{
442 struct ieee80211_key *key; 439 struct ieee80211_key *key;
440 u16 fc = tx->fc;
443 441
444 if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) 442 if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
445 tx->key = NULL; 443 tx->key = NULL;
@@ -448,19 +446,38 @@ ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
448 else if ((key = rcu_dereference(tx->sdata->default_key))) 446 else if ((key = rcu_dereference(tx->sdata->default_key)))
449 tx->key = key; 447 tx->key = key;
450 else if (tx->sdata->drop_unencrypted && 448 else if (tx->sdata->drop_unencrypted &&
451 !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { 449 !(tx->u.tx.control->flags & IEEE80211_TXCTL_EAPOL_FRAME) &&
450 !(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) {
452 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); 451 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
453 return TXRX_DROP; 452 return TXRX_DROP;
454 } else { 453 } else
455 tx->key = NULL; 454 tx->key = NULL;
456 tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
457 }
458 455
459 if (tx->key) { 456 if (tx->key) {
457 u16 ftype, stype;
458
460 tx->key->tx_rx_count++; 459 tx->key->tx_rx_count++;
461 /* TODO: add threshold stuff again */ 460 /* TODO: add threshold stuff again */
461
462 switch (tx->key->conf.alg) {
463 case ALG_WEP:
464 ftype = fc & IEEE80211_FCTL_FTYPE;
465 stype = fc & IEEE80211_FCTL_STYPE;
466
467 if (ftype == IEEE80211_FTYPE_MGMT &&
468 stype == IEEE80211_STYPE_AUTH)
469 break;
470 case ALG_TKIP:
471 case ALG_CCMP:
472 if (!WLAN_FC_DATA_PRESENT(fc))
473 tx->key = NULL;
474 break;
475 }
462 } 476 }
463 477
478 if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
479 tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
480
464 return TXRX_CONTINUE; 481 return TXRX_CONTINUE;
465} 482}
466 483
@@ -567,21 +584,17 @@ ieee80211_tx_h_encrypt(struct ieee80211_txrx_data *tx)
567static ieee80211_txrx_result 584static ieee80211_txrx_result
568ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) 585ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
569{ 586{
570 struct rate_control_extra extra; 587 struct rate_selection rsel;
571 588
572 if (likely(!tx->u.tx.rate)) { 589 if (likely(!tx->u.tx.rate)) {
573 memset(&extra, 0, sizeof(extra)); 590 rate_control_get_rate(tx->dev, tx->u.tx.mode, tx->skb, &rsel);
574 extra.mode = tx->u.tx.mode; 591 tx->u.tx.rate = rsel.rate;
575 extra.ethertype = tx->ethertype; 592 if (unlikely(rsel.probe != NULL)) {
576
577 tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev,
578 tx->skb, &extra);
579 if (unlikely(extra.probe != NULL)) {
580 tx->u.tx.control->flags |= 593 tx->u.tx.control->flags |=
581 IEEE80211_TXCTL_RATE_CTRL_PROBE; 594 IEEE80211_TXCTL_RATE_CTRL_PROBE;
582 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; 595 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
583 tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; 596 tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val;
584 tx->u.tx.rate = extra.probe; 597 tx->u.tx.rate = rsel.probe;
585 } else 598 } else
586 tx->u.tx.control->alt_retry_rate = -1; 599 tx->u.tx.control->alt_retry_rate = -1;
587 600
@@ -591,15 +604,15 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
591 tx->u.tx.control->alt_retry_rate = -1; 604 tx->u.tx.control->alt_retry_rate = -1;
592 605
593 if (tx->u.tx.mode->mode == MODE_IEEE80211G && 606 if (tx->u.tx.mode->mode == MODE_IEEE80211G &&
594 (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && 607 tx->sdata->bss_conf.use_cts_prot &&
595 (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && extra.nonerp) { 608 (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && rsel.nonerp) {
596 tx->u.tx.last_frag_rate = tx->u.tx.rate; 609 tx->u.tx.last_frag_rate = tx->u.tx.rate;
597 if (extra.probe) 610 if (rsel.probe)
598 tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG; 611 tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
599 else 612 else
600 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; 613 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
601 tx->u.tx.rate = extra.nonerp; 614 tx->u.tx.rate = rsel.nonerp;
602 tx->u.tx.control->rate = extra.nonerp; 615 tx->u.tx.control->rate = rsel.nonerp;
603 tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; 616 tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE;
604 } else { 617 } else {
605 tx->u.tx.last_frag_rate = tx->u.tx.rate; 618 tx->u.tx.last_frag_rate = tx->u.tx.rate;
@@ -653,7 +666,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
653 if (mode->mode == MODE_IEEE80211G && 666 if (mode->mode == MODE_IEEE80211G &&
654 (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && 667 (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) &&
655 (tx->flags & IEEE80211_TXRXD_TXUNICAST) && 668 (tx->flags & IEEE80211_TXRXD_TXUNICAST) &&
656 (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && 669 tx->sdata->bss_conf.use_cts_prot &&
657 !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) 670 !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS))
658 control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; 671 control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT;
659 672
@@ -662,7 +675,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
662 * available on the network at the current point in time. */ 675 * available on the network at the current point in time. */
663 if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && 676 if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) &&
664 (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && 677 (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) &&
665 (tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && 678 tx->sdata->bss_conf.use_short_preamble &&
666 (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { 679 (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) {
667 tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; 680 tx->u.tx.control->tx_rate = tx->u.tx.rate->val2;
668 } 681 }
@@ -706,15 +719,6 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
706 } 719 }
707 } 720 }
708 721
709 /*
710 * Tell hardware to not encrypt when we had sw crypto.
711 * Because we use the same flag to internally indicate that
712 * no (software) encryption should be done, we have to set it
713 * after all crypto handlers.
714 */
715 if (tx->key && !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
716 tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
717
718 return TXRX_CONTINUE; 722 return TXRX_CONTINUE;
719} 723}
720 724
@@ -927,7 +931,6 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
927 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 931 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
928 struct ieee80211_hdr *hdr; 932 struct ieee80211_hdr *hdr;
929 struct ieee80211_sub_if_data *sdata; 933 struct ieee80211_sub_if_data *sdata;
930 ieee80211_txrx_result res = TXRX_CONTINUE;
931 934
932 int hdrlen; 935 int hdrlen;
933 936
@@ -945,7 +948,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
945 948
946 /* process and remove the injection radiotap header */ 949 /* process and remove the injection radiotap header */
947 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 950 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
948 if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { 951 if (unlikely(sdata->vif.type == IEEE80211_IF_TYPE_MNTR)) {
949 if (__ieee80211_parse_tx_radiotap(tx, skb) == TXRX_DROP) 952 if (__ieee80211_parse_tx_radiotap(tx, skb) == TXRX_DROP)
950 return TXRX_DROP; 953 return TXRX_DROP;
951 954
@@ -992,12 +995,10 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
992 } 995 }
993 control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; 996 control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT;
994 997
995 return res; 998 return TXRX_CONTINUE;
996} 999}
997 1000
998/* Device in tx->dev has a reference added; use dev_put(tx->dev) when 1001/*
999 * finished with it.
1000 *
1001 * NB: @tx is uninitialised when passed in here 1002 * NB: @tx is uninitialised when passed in here
1002 */ 1003 */
1003static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, 1004static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
@@ -1018,6 +1019,7 @@ static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
1018 return -ENODEV; 1019 return -ENODEV;
1019 /* initialises tx with control */ 1020 /* initialises tx with control */
1020 __ieee80211_tx_prepare(tx, skb, dev, control); 1021 __ieee80211_tx_prepare(tx, skb, dev, control);
1022 dev_put(dev);
1021 return 0; 1023 return 0;
1022} 1024}
1023 1025
@@ -1248,14 +1250,16 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
1248 } 1250 }
1249 } 1251 }
1250 1252
1251 control.ifindex = odev->ifindex; 1253 control.vif = &osdata->vif;
1252 control.type = osdata->type; 1254 control.type = osdata->vif.type;
1253 if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS) 1255 if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS)
1254 control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; 1256 control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS;
1255 if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT) 1257 if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT)
1256 control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; 1258 control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
1257 if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) 1259 if (pkt_data->flags & IEEE80211_TXPD_REQUEUE)
1258 control.flags |= IEEE80211_TXCTL_REQUEUE; 1260 control.flags |= IEEE80211_TXCTL_REQUEUE;
1261 if (pkt_data->flags & IEEE80211_TXPD_EAPOL_FRAME)
1262 control.flags |= IEEE80211_TXCTL_EAPOL_FRAME;
1259 control.queue = pkt_data->queue; 1263 control.queue = pkt_data->queue;
1260 1264
1261 ret = ieee80211_tx(odev, skb, &control); 1265 ret = ieee80211_tx(odev, skb, &control);
@@ -1348,6 +1352,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1348 int encaps_len, skip_header_bytes; 1352 int encaps_len, skip_header_bytes;
1349 int nh_pos, h_pos; 1353 int nh_pos, h_pos;
1350 struct sta_info *sta; 1354 struct sta_info *sta;
1355 u32 sta_flags = 0;
1351 1356
1352 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1357 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1353 if (unlikely(skb->len < ETH_HLEN)) { 1358 if (unlikely(skb->len < ETH_HLEN)) {
@@ -1363,10 +1368,9 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1363 /* convert Ethernet header to proper 802.11 header (based on 1368 /* convert Ethernet header to proper 802.11 header (based on
1364 * operation mode) */ 1369 * operation mode) */
1365 ethertype = (skb->data[12] << 8) | skb->data[13]; 1370 ethertype = (skb->data[12] << 8) | skb->data[13];
1366 /* TODO: handling for 802.1x authorized/unauthorized port */
1367 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; 1371 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA;
1368 1372
1369 switch (sdata->type) { 1373 switch (sdata->vif.type) {
1370 case IEEE80211_IF_TYPE_AP: 1374 case IEEE80211_IF_TYPE_AP:
1371 case IEEE80211_IF_TYPE_VLAN: 1375 case IEEE80211_IF_TYPE_VLAN:
1372 fc |= IEEE80211_FCTL_FROMDS; 1376 fc |= IEEE80211_FCTL_FROMDS;
@@ -1405,16 +1409,42 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1405 goto fail; 1409 goto fail;
1406 } 1410 }
1407 1411
1408 /* receiver is QoS enabled, use a QoS type frame */
1409 sta = sta_info_get(local, hdr.addr1); 1412 sta = sta_info_get(local, hdr.addr1);
1410 if (sta) { 1413 if (sta) {
1411 if (sta->flags & WLAN_STA_WME) { 1414 sta_flags = sta->flags;
1412 fc |= IEEE80211_STYPE_QOS_DATA;
1413 hdrlen += 2;
1414 }
1415 sta_info_put(sta); 1415 sta_info_put(sta);
1416 } 1416 }
1417 1417
1418 /* receiver is QoS enabled, use a QoS type frame */
1419 if (sta_flags & WLAN_STA_WME) {
1420 fc |= IEEE80211_STYPE_QOS_DATA;
1421 hdrlen += 2;
1422 }
1423
1424 /*
1425 * If port access control is enabled, drop frames to unauthorised
1426 * stations unless they are EAPOL frames from the local station.
1427 */
1428 if (unlikely(sdata->ieee802_1x_pac &&
1429 !(sta_flags & WLAN_STA_AUTHORIZED) &&
1430 !(ethertype == ETH_P_PAE &&
1431 compare_ether_addr(dev->dev_addr,
1432 skb->data + ETH_ALEN) == 0))) {
1433#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1434 DECLARE_MAC_BUF(mac);
1435
1436 if (net_ratelimit())
1437 printk(KERN_DEBUG "%s: dropped frame to %s"
1438 " (unauthorized port)\n", dev->name,
1439 print_mac(mac, hdr.addr1));
1440#endif
1441
1442 I802_DEBUG_INC(local->tx_handlers_drop_unauth_port);
1443
1444 ret = 0;
1445 goto fail;
1446 }
1447
1418 hdr.frame_control = cpu_to_le16(fc); 1448 hdr.frame_control = cpu_to_le16(fc);
1419 hdr.duration_id = 0; 1449 hdr.duration_id = 0;
1420 hdr.seq_ctrl = 0; 1450 hdr.seq_ctrl = 0;
@@ -1503,6 +1533,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1503 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; 1533 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
1504 memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); 1534 memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
1505 pkt_data->ifindex = dev->ifindex; 1535 pkt_data->ifindex = dev->ifindex;
1536 if (ethertype == ETH_P_PAE)
1537 pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
1506 1538
1507 skb->dev = local->mdev; 1539 skb->dev = local->mdev;
1508 dev->stats.tx_packets++; 1540 dev->stats.tx_packets++;
@@ -1527,64 +1559,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1527 return ret; 1559 return ret;
1528} 1560}
1529 1561
1530/*
1531 * This is the transmit routine for the 802.11 type interfaces
1532 * called by upper layers of the linux networking
1533 * stack when it has a frame to transmit
1534 */
1535int ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev)
1536{
1537 struct ieee80211_sub_if_data *sdata;
1538 struct ieee80211_tx_packet_data *pkt_data;
1539 struct ieee80211_hdr *hdr;
1540 u16 fc;
1541
1542 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1543
1544 if (skb->len < 10) {
1545 dev_kfree_skb(skb);
1546 return 0;
1547 }
1548
1549 if (skb_headroom(skb) < sdata->local->tx_headroom) {
1550 if (pskb_expand_head(skb, sdata->local->tx_headroom,
1551 0, GFP_ATOMIC)) {
1552 dev_kfree_skb(skb);
1553 return 0;
1554 }
1555 }
1556
1557 hdr = (struct ieee80211_hdr *) skb->data;
1558 fc = le16_to_cpu(hdr->frame_control);
1559
1560 pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
1561 memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
1562 pkt_data->ifindex = sdata->dev->ifindex;
1563
1564 skb->priority = 20; /* use hardcoded priority for mgmt TX queue */
1565 skb->dev = sdata->local->mdev;
1566
1567 /*
1568 * We're using the protocol field of the the frame control header
1569 * to request TX callback for hostapd. BIT(1) is checked.
1570 */
1571 if ((fc & BIT(1)) == BIT(1)) {
1572 pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
1573 fc &= ~BIT(1);
1574 hdr->frame_control = cpu_to_le16(fc);
1575 }
1576
1577 if (!(fc & IEEE80211_FCTL_PROTECTED))
1578 pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
1579
1580 dev->stats.tx_packets++;
1581 dev->stats.tx_bytes += skb->len;
1582
1583 dev_queue_xmit(skb);
1584
1585 return 0;
1586}
1587
1588/* helper functions for pending packets for when queues are stopped */ 1562/* helper functions for pending packets for when queues are stopped */
1589 1563
1590void ieee80211_clear_tx_pending(struct ieee80211_local *local) 1564void ieee80211_clear_tx_pending(struct ieee80211_local *local)
@@ -1653,7 +1627,8 @@ void ieee80211_tx_pending(unsigned long data)
1653 1627
1654static void ieee80211_beacon_add_tim(struct ieee80211_local *local, 1628static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1655 struct ieee80211_if_ap *bss, 1629 struct ieee80211_if_ap *bss,
1656 struct sk_buff *skb) 1630 struct sk_buff *skb,
1631 struct beacon_data *beacon)
1657{ 1632{
1658 u8 *pos, *tim; 1633 u8 *pos, *tim;
1659 int aid0 = 0; 1634 int aid0 = 0;
@@ -1669,7 +1644,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1669 IEEE80211_MAX_AID+1); 1644 IEEE80211_MAX_AID+1);
1670 1645
1671 if (bss->dtim_count == 0) 1646 if (bss->dtim_count == 0)
1672 bss->dtim_count = bss->dtim_period - 1; 1647 bss->dtim_count = beacon->dtim_period - 1;
1673 else 1648 else
1674 bss->dtim_count--; 1649 bss->dtim_count--;
1675 1650
@@ -1677,7 +1652,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1677 *pos++ = WLAN_EID_TIM; 1652 *pos++ = WLAN_EID_TIM;
1678 *pos++ = 4; 1653 *pos++ = 4;
1679 *pos++ = bss->dtim_count; 1654 *pos++ = bss->dtim_count;
1680 *pos++ = bss->dtim_period; 1655 *pos++ = beacon->dtim_period;
1681 1656
1682 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) 1657 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf))
1683 aid0 = 1; 1658 aid0 = 1;
@@ -1715,7 +1690,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1715 read_unlock_bh(&local->sta_lock); 1690 read_unlock_bh(&local->sta_lock);
1716} 1691}
1717 1692
1718struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, 1693struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
1694 struct ieee80211_vif *vif,
1719 struct ieee80211_tx_control *control) 1695 struct ieee80211_tx_control *control)
1720{ 1696{
1721 struct ieee80211_local *local = hw_to_local(hw); 1697 struct ieee80211_local *local = hw_to_local(hw);
@@ -1723,68 +1699,64 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
1723 struct net_device *bdev; 1699 struct net_device *bdev;
1724 struct ieee80211_sub_if_data *sdata = NULL; 1700 struct ieee80211_sub_if_data *sdata = NULL;
1725 struct ieee80211_if_ap *ap = NULL; 1701 struct ieee80211_if_ap *ap = NULL;
1726 struct ieee80211_rate *rate; 1702 struct rate_selection rsel;
1727 struct rate_control_extra extra; 1703 struct beacon_data *beacon;
1728 u8 *b_head, *b_tail; 1704
1729 int bh_len, bt_len; 1705 rcu_read_lock();
1730
1731 bdev = dev_get_by_index(&init_net, if_id);
1732 if (bdev) {
1733 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
1734 ap = &sdata->u.ap;
1735 dev_put(bdev);
1736 }
1737 1706
1738 if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || 1707 sdata = vif_to_sdata(vif);
1739 !ap->beacon_head) { 1708 bdev = sdata->dev;
1709 ap = &sdata->u.ap;
1710
1711 beacon = rcu_dereference(ap->beacon);
1712
1713 if (!ap || sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon) {
1740#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1714#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1741 if (net_ratelimit()) 1715 if (net_ratelimit())
1742 printk(KERN_DEBUG "no beacon data avail for idx=%d " 1716 printk(KERN_DEBUG "no beacon data avail for %s\n",
1743 "(%s)\n", if_id, bdev ? bdev->name : "N/A"); 1717 bdev->name);
1744#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 1718#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
1745 return NULL; 1719 skb = NULL;
1720 goto out;
1746 } 1721 }
1747 1722
1748 /* Assume we are generating the normal beacon locally */ 1723 /* headroom, head length, tail length and maximum TIM length */
1749 b_head = ap->beacon_head; 1724 skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
1750 b_tail = ap->beacon_tail; 1725 beacon->tail_len + 256);
1751 bh_len = ap->beacon_head_len;
1752 bt_len = ap->beacon_tail_len;
1753
1754 skb = dev_alloc_skb(local->tx_headroom +
1755 bh_len + bt_len + 256 /* maximum TIM len */);
1756 if (!skb) 1726 if (!skb)
1757 return NULL; 1727 goto out;
1758 1728
1759 skb_reserve(skb, local->tx_headroom); 1729 skb_reserve(skb, local->tx_headroom);
1760 memcpy(skb_put(skb, bh_len), b_head, bh_len); 1730 memcpy(skb_put(skb, beacon->head_len), beacon->head,
1731 beacon->head_len);
1761 1732
1762 ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); 1733 ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data);
1763 1734
1764 ieee80211_beacon_add_tim(local, ap, skb); 1735 ieee80211_beacon_add_tim(local, ap, skb, beacon);
1765 1736
1766 if (b_tail) { 1737 if (beacon->tail)
1767 memcpy(skb_put(skb, bt_len), b_tail, bt_len); 1738 memcpy(skb_put(skb, beacon->tail_len), beacon->tail,
1768 } 1739 beacon->tail_len);
1769 1740
1770 if (control) { 1741 if (control) {
1771 memset(&extra, 0, sizeof(extra)); 1742 rate_control_get_rate(local->mdev, local->oper_hw_mode, skb,
1772 extra.mode = local->oper_hw_mode; 1743 &rsel);
1773 1744 if (!rsel.rate) {
1774 rate = rate_control_get_rate(local, local->mdev, skb, &extra);
1775 if (!rate) {
1776 if (net_ratelimit()) { 1745 if (net_ratelimit()) {
1777 printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " 1746 printk(KERN_DEBUG "%s: ieee80211_beacon_get: "
1778 "found\n", wiphy_name(local->hw.wiphy)); 1747 "no rate found\n",
1748 wiphy_name(local->hw.wiphy));
1779 } 1749 }
1780 dev_kfree_skb(skb); 1750 dev_kfree_skb(skb);
1781 return NULL; 1751 skb = NULL;
1752 goto out;
1782 } 1753 }
1783 1754
1755 control->vif = vif;
1784 control->tx_rate = 1756 control->tx_rate =
1785 ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && 1757 (sdata->bss_conf.use_short_preamble &&
1786 (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? 1758 (rsel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
1787 rate->val2 : rate->val; 1759 rsel.rate->val2 : rsel.rate->val;
1788 control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; 1760 control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
1789 control->power_level = local->hw.conf.power_level; 1761 control->power_level = local->hw.conf.power_level;
1790 control->flags |= IEEE80211_TXCTL_NO_ACK; 1762 control->flags |= IEEE80211_TXCTL_NO_ACK;
@@ -1793,11 +1765,14 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
1793 } 1765 }
1794 1766
1795 ap->num_beacons++; 1767 ap->num_beacons++;
1768
1769 out:
1770 rcu_read_unlock();
1796 return skb; 1771 return skb;
1797} 1772}
1798EXPORT_SYMBOL(ieee80211_beacon_get); 1773EXPORT_SYMBOL(ieee80211_beacon_get);
1799 1774
1800void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id, 1775void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
1801 const void *frame, size_t frame_len, 1776 const void *frame, size_t frame_len,
1802 const struct ieee80211_tx_control *frame_txctl, 1777 const struct ieee80211_tx_control *frame_txctl,
1803 struct ieee80211_rts *rts) 1778 struct ieee80211_rts *rts)
@@ -1807,13 +1782,14 @@ void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id,
1807 1782
1808 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; 1783 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS;
1809 rts->frame_control = cpu_to_le16(fctl); 1784 rts->frame_control = cpu_to_le16(fctl);
1810 rts->duration = ieee80211_rts_duration(hw, if_id, frame_len, frame_txctl); 1785 rts->duration = ieee80211_rts_duration(hw, vif, frame_len,
1786 frame_txctl);
1811 memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); 1787 memcpy(rts->ra, hdr->addr1, sizeof(rts->ra));
1812 memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); 1788 memcpy(rts->ta, hdr->addr2, sizeof(rts->ta));
1813} 1789}
1814EXPORT_SYMBOL(ieee80211_rts_get); 1790EXPORT_SYMBOL(ieee80211_rts_get);
1815 1791
1816void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id, 1792void ieee80211_ctstoself_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
1817 const void *frame, size_t frame_len, 1793 const void *frame, size_t frame_len,
1818 const struct ieee80211_tx_control *frame_txctl, 1794 const struct ieee80211_tx_control *frame_txctl,
1819 struct ieee80211_cts *cts) 1795 struct ieee80211_cts *cts)
@@ -1823,13 +1799,15 @@ void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id,
1823 1799
1824 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; 1800 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS;
1825 cts->frame_control = cpu_to_le16(fctl); 1801 cts->frame_control = cpu_to_le16(fctl);
1826 cts->duration = ieee80211_ctstoself_duration(hw, if_id, frame_len, frame_txctl); 1802 cts->duration = ieee80211_ctstoself_duration(hw, vif,
1803 frame_len, frame_txctl);
1827 memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); 1804 memcpy(cts->ra, hdr->addr1, sizeof(cts->ra));
1828} 1805}
1829EXPORT_SYMBOL(ieee80211_ctstoself_get); 1806EXPORT_SYMBOL(ieee80211_ctstoself_get);
1830 1807
1831struct sk_buff * 1808struct sk_buff *
1832ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, 1809ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
1810 struct ieee80211_vif *vif,
1833 struct ieee80211_tx_control *control) 1811 struct ieee80211_tx_control *control)
1834{ 1812{
1835 struct ieee80211_local *local = hw_to_local(hw); 1813 struct ieee80211_local *local = hw_to_local(hw);
@@ -1841,16 +1819,25 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
1841 struct net_device *bdev; 1819 struct net_device *bdev;
1842 struct ieee80211_sub_if_data *sdata; 1820 struct ieee80211_sub_if_data *sdata;
1843 struct ieee80211_if_ap *bss = NULL; 1821 struct ieee80211_if_ap *bss = NULL;
1822 struct beacon_data *beacon;
1844 1823
1845 bdev = dev_get_by_index(&init_net, if_id); 1824 sdata = vif_to_sdata(vif);
1846 if (bdev) { 1825 bdev = sdata->dev;
1847 sdata = IEEE80211_DEV_TO_SUB_IF(bdev); 1826
1848 bss = &sdata->u.ap; 1827
1849 dev_put(bdev); 1828 if (!bss)
1850 }
1851 if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head)
1852 return NULL; 1829 return NULL;
1853 1830
1831 rcu_read_lock();
1832 beacon = rcu_dereference(bss->beacon);
1833
1834 if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon ||
1835 !beacon->head) {
1836 rcu_read_unlock();
1837 return NULL;
1838 }
1839 rcu_read_unlock();
1840
1854 if (bss->dtim_count != 0) 1841 if (bss->dtim_count != 0)
1855 return NULL; /* send buffered bc/mc only after DTIM beacon */ 1842 return NULL; /* send buffered bc/mc only after DTIM beacon */
1856 memset(control, 0, sizeof(*control)); 1843 memset(control, 0, sizeof(*control));
@@ -1883,7 +1870,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
1883 if (res == TXRX_DROP || res == TXRX_QUEUED) 1870 if (res == TXRX_DROP || res == TXRX_QUEUED)
1884 break; 1871 break;
1885 } 1872 }
1886 dev_put(tx.dev);
1887 skb = tx.skb; /* handlers are allowed to change skb */ 1873 skb = tx.skb; /* handlers are allowed to change skb */
1888 1874
1889 if (res == TXRX_DROP) { 1875 if (res == TXRX_DROP) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5a0564e1dbd6..5e631ce98d7e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -22,6 +22,7 @@
22#include <linux/bitmap.h> 22#include <linux/bitmap.h>
23#include <net/net_namespace.h> 23#include <net/net_namespace.h>
24#include <net/cfg80211.h> 24#include <net/cfg80211.h>
25#include <net/rtnetlink.h>
25 26
26#include "ieee80211_i.h" 27#include "ieee80211_i.h"
27#include "ieee80211_rate.h" 28#include "ieee80211_rate.h"
@@ -39,10 +40,6 @@ const unsigned char rfc1042_header[] =
39const unsigned char bridge_tunnel_header[] = 40const unsigned char bridge_tunnel_header[] =
40 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 41 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
41 42
42/* No encapsulation header if EtherType < 0x600 (=length) */
43static const unsigned char eapol_header[] =
44 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e };
45
46 43
47static int rate_list_match(const int *rate_list, int rate) 44static int rate_list_match(const int *rate_list, int rate)
48{ 45{
@@ -130,17 +127,21 @@ void ieee80211_prepare_rates(struct ieee80211_local *local,
130 } 127 }
131} 128}
132 129
133u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) 130u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
131 enum ieee80211_if_types type)
134{ 132{
135 u16 fc; 133 u16 fc;
136 134
137 if (len < 24) 135 /* drop ACK/CTS frames and incorrect hdr len (ctrl) */
136 if (len < 16)
138 return NULL; 137 return NULL;
139 138
140 fc = le16_to_cpu(hdr->frame_control); 139 fc = le16_to_cpu(hdr->frame_control);
141 140
142 switch (fc & IEEE80211_FCTL_FTYPE) { 141 switch (fc & IEEE80211_FCTL_FTYPE) {
143 case IEEE80211_FTYPE_DATA: 142 case IEEE80211_FTYPE_DATA:
143 if (len < 24) /* drop incorrect hdr len (data) */
144 return NULL;
144 switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 145 switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
145 case IEEE80211_FCTL_TODS: 146 case IEEE80211_FCTL_TODS:
146 return hdr->addr1; 147 return hdr->addr1;
@@ -153,10 +154,24 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len)
153 } 154 }
154 break; 155 break;
155 case IEEE80211_FTYPE_MGMT: 156 case IEEE80211_FTYPE_MGMT:
157 if (len < 24) /* drop incorrect hdr len (mgmt) */
158 return NULL;
156 return hdr->addr3; 159 return hdr->addr3;
157 case IEEE80211_FTYPE_CTL: 160 case IEEE80211_FTYPE_CTL:
158 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) 161 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)
159 return hdr->addr1; 162 return hdr->addr1;
163 else if ((fc & IEEE80211_FCTL_STYPE) ==
164 IEEE80211_STYPE_BACK_REQ) {
165 switch (type) {
166 case IEEE80211_IF_TYPE_STA:
167 return hdr->addr2;
168 case IEEE80211_IF_TYPE_AP:
169 case IEEE80211_IF_TYPE_VLAN:
170 return hdr->addr1;
171 default:
172 return NULL;
173 }
174 }
160 else 175 else
161 return NULL; 176 return NULL;
162 } 177 }
@@ -217,31 +232,6 @@ int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
217} 232}
218EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); 233EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
219 234
220int ieee80211_is_eapol(const struct sk_buff *skb)
221{
222 const struct ieee80211_hdr *hdr;
223 u16 fc;
224 int hdrlen;
225
226 if (unlikely(skb->len < 10))
227 return 0;
228
229 hdr = (const struct ieee80211_hdr *) skb->data;
230 fc = le16_to_cpu(hdr->frame_control);
231
232 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
233 return 0;
234
235 hdrlen = ieee80211_get_hdrlen(fc);
236
237 if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) &&
238 memcmp(skb->data + hdrlen, eapol_header,
239 sizeof(eapol_header)) == 0))
240 return 1;
241
242 return 0;
243}
244
245void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) 235void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx)
246{ 236{
247 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; 237 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
@@ -312,45 +302,35 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
312} 302}
313 303
314/* Exported duration function for driver use */ 304/* Exported duration function for driver use */
315__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, int if_id, 305__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
306 struct ieee80211_vif *vif,
316 size_t frame_len, int rate) 307 size_t frame_len, int rate)
317{ 308{
318 struct ieee80211_local *local = hw_to_local(hw); 309 struct ieee80211_local *local = hw_to_local(hw);
319 struct net_device *bdev = dev_get_by_index(&init_net, if_id); 310 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
320 struct ieee80211_sub_if_data *sdata;
321 u16 dur; 311 u16 dur;
322 int erp; 312 int erp;
323 313
324 if (unlikely(!bdev))
325 return 0;
326
327 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
328 erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); 314 erp = ieee80211_is_erp_rate(hw->conf.phymode, rate);
329 dur = ieee80211_frame_duration(local, frame_len, rate, 315 dur = ieee80211_frame_duration(local, frame_len, rate, erp,
330 erp, sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); 316 sdata->bss_conf.use_short_preamble);
331 317
332 dev_put(bdev);
333 return cpu_to_le16(dur); 318 return cpu_to_le16(dur);
334} 319}
335EXPORT_SYMBOL(ieee80211_generic_frame_duration); 320EXPORT_SYMBOL(ieee80211_generic_frame_duration);
336 321
337__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id, 322__le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
338 size_t frame_len, 323 struct ieee80211_vif *vif, size_t frame_len,
339 const struct ieee80211_tx_control *frame_txctl) 324 const struct ieee80211_tx_control *frame_txctl)
340{ 325{
341 struct ieee80211_local *local = hw_to_local(hw); 326 struct ieee80211_local *local = hw_to_local(hw);
342 struct ieee80211_rate *rate; 327 struct ieee80211_rate *rate;
343 struct net_device *bdev = dev_get_by_index(&init_net, if_id); 328 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
344 struct ieee80211_sub_if_data *sdata; 329 bool short_preamble;
345 int short_preamble;
346 int erp; 330 int erp;
347 u16 dur; 331 u16 dur;
348 332
349 if (unlikely(!bdev)) 333 short_preamble = sdata->bss_conf.use_short_preamble;
350 return 0;
351
352 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
353 short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
354 334
355 rate = frame_txctl->rts_rate; 335 rate = frame_txctl->rts_rate;
356 erp = !!(rate->flags & IEEE80211_RATE_ERP); 336 erp = !!(rate->flags & IEEE80211_RATE_ERP);
@@ -365,28 +345,23 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id,
365 dur += ieee80211_frame_duration(local, 10, rate->rate, 345 dur += ieee80211_frame_duration(local, 10, rate->rate,
366 erp, short_preamble); 346 erp, short_preamble);
367 347
368 dev_put(bdev);
369 return cpu_to_le16(dur); 348 return cpu_to_le16(dur);
370} 349}
371EXPORT_SYMBOL(ieee80211_rts_duration); 350EXPORT_SYMBOL(ieee80211_rts_duration);
372 351
373__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id, 352__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
353 struct ieee80211_vif *vif,
374 size_t frame_len, 354 size_t frame_len,
375 const struct ieee80211_tx_control *frame_txctl) 355 const struct ieee80211_tx_control *frame_txctl)
376{ 356{
377 struct ieee80211_local *local = hw_to_local(hw); 357 struct ieee80211_local *local = hw_to_local(hw);
378 struct ieee80211_rate *rate; 358 struct ieee80211_rate *rate;
379 struct net_device *bdev = dev_get_by_index(&init_net, if_id); 359 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
380 struct ieee80211_sub_if_data *sdata; 360 bool short_preamble;
381 int short_preamble;
382 int erp; 361 int erp;
383 u16 dur; 362 u16 dur;
384 363
385 if (unlikely(!bdev)) 364 short_preamble = sdata->bss_conf.use_short_preamble;
386 return 0;
387
388 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
389 short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
390 365
391 rate = frame_txctl->rts_rate; 366 rate = frame_txctl->rts_rate;
392 erp = !!(rate->flags & IEEE80211_RATE_ERP); 367 erp = !!(rate->flags & IEEE80211_RATE_ERP);
@@ -400,7 +375,6 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id,
400 erp, short_preamble); 375 erp, short_preamble);
401 } 376 }
402 377
403 dev_put(bdev);
404 return cpu_to_le16(dur); 378 return cpu_to_le16(dur);
405} 379}
406EXPORT_SYMBOL(ieee80211_ctstoself_duration); 380EXPORT_SYMBOL(ieee80211_ctstoself_duration);
@@ -484,3 +458,37 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw)
484 ieee80211_wake_queue(hw, i); 458 ieee80211_wake_queue(hw, i);
485} 459}
486EXPORT_SYMBOL(ieee80211_wake_queues); 460EXPORT_SYMBOL(ieee80211_wake_queues);
461
462void ieee80211_iterate_active_interfaces(
463 struct ieee80211_hw *hw,
464 void (*iterator)(void *data, u8 *mac,
465 struct ieee80211_vif *vif),
466 void *data)
467{
468 struct ieee80211_local *local = hw_to_local(hw);
469 struct ieee80211_sub_if_data *sdata;
470
471 rcu_read_lock();
472
473 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
474 switch (sdata->vif.type) {
475 case IEEE80211_IF_TYPE_INVALID:
476 case IEEE80211_IF_TYPE_MNTR:
477 case IEEE80211_IF_TYPE_VLAN:
478 continue;
479 case IEEE80211_IF_TYPE_AP:
480 case IEEE80211_IF_TYPE_STA:
481 case IEEE80211_IF_TYPE_IBSS:
482 case IEEE80211_IF_TYPE_WDS:
483 break;
484 }
485 if (sdata->dev == local->mdev)
486 continue;
487 if (netif_running(sdata->dev))
488 iterator(data, sdata->dev->dev_addr,
489 &sdata->vif);
490 }
491
492 rcu_read_unlock();
493}
494EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index a84a23310ff4..a0cff72a580b 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -265,7 +265,8 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
265 if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, 265 if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen,
266 skb->data + hdrlen + WEP_IV_LEN, 266 skb->data + hdrlen + WEP_IV_LEN,
267 len)) { 267 len)) {
268 printk(KERN_DEBUG "WEP decrypt failed (ICV)\n"); 268 if (net_ratelimit())
269 printk(KERN_DEBUG "WEP decrypt failed (ICV)\n");
269 ret = -1; 270 ret = -1;
270 } 271 }
271 272
@@ -314,9 +315,11 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_txrx_data *rx)
314 315
315 if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) { 316 if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) {
316 if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { 317 if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) {
318#ifdef CONFIG_MAC80211_DEBUG
317 if (net_ratelimit()) 319 if (net_ratelimit())
318 printk(KERN_DEBUG "%s: RX WEP frame, decrypt " 320 printk(KERN_DEBUG "%s: RX WEP frame, decrypt "
319 "failed\n", rx->dev->name); 321 "failed\n", rx->dev->name);
322#endif /* CONFIG_MAC80211_DEBUG */
320 return TXRX_DROP; 323 return TXRX_DROP;
321 } 324 }
322 } else if (!(rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) { 325 } else if (!(rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) {
@@ -346,16 +349,6 @@ static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb)
346ieee80211_txrx_result 349ieee80211_txrx_result
347ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx) 350ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx)
348{ 351{
349 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
350 u16 fc;
351
352 fc = le16_to_cpu(hdr->frame_control);
353
354 if (((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
355 ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
356 (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)))
357 return TXRX_CONTINUE;
358
359 tx->u.tx.control->iv_len = WEP_IV_LEN; 352 tx->u.tx.control->iv_len = WEP_IV_LEN;
360 tx->u.tx.control->icv_len = WEP_ICV_LEN; 353 tx->u.tx.control->icv_len = WEP_ICV_LEN;
361 ieee80211_tx_set_iswep(tx); 354 ieee80211_tx_set_iswep(tx);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 5b8a157975a3..4e236599dd31 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -28,6 +28,7 @@ struct ieee80211_sched_data
28 struct sk_buff_head requeued[TC_80211_MAX_QUEUES]; 28 struct sk_buff_head requeued[TC_80211_MAX_QUEUES];
29}; 29};
30 30
31static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0};
31 32
32/* given a data frame determine the 802.1p/1d tag to use */ 33/* given a data frame determine the 802.1p/1d tag to use */
33static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd) 34static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
@@ -54,12 +55,12 @@ static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
54 return skb->priority - 256; 55 return skb->priority - 256;
55 56
56 /* check there is a valid IP header present */ 57 /* check there is a valid IP header present */
57 offset = ieee80211_get_hdrlen_from_skb(skb) + 8 /* LLC + proto */; 58 offset = ieee80211_get_hdrlen_from_skb(skb);
58 if (skb->protocol != __constant_htons(ETH_P_IP) || 59 if (skb->len < offset + sizeof(llc_ip_hdr) + sizeof(*ip) ||
59 skb->len < offset + sizeof(*ip)) 60 memcmp(skb->data + offset, llc_ip_hdr, sizeof(llc_ip_hdr)))
60 return 0; 61 return 0;
61 62
62 ip = (struct iphdr *) (skb->data + offset); 63 ip = (struct iphdr *) (skb->data + offset + sizeof(llc_ip_hdr));
63 64
64 dscp = ip->tos & 0xfc; 65 dscp = ip->tos & 0xfc;
65 if (dscp & 0x1c) 66 if (dscp & 0x1c)
@@ -296,16 +297,16 @@ static void wme_qdiscop_destroy(struct Qdisc* qd)
296 297
297 298
298/* called whenever parameters are updated on existing qdisc */ 299/* called whenever parameters are updated on existing qdisc */
299static int wme_qdiscop_tune(struct Qdisc *qd, struct rtattr *opt) 300static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt)
300{ 301{
301/* struct ieee80211_sched_data *q = qdisc_priv(qd); 302/* struct ieee80211_sched_data *q = qdisc_priv(qd);
302*/ 303*/
303 /* check our options block is the right size */ 304 /* check our options block is the right size */
304 /* copy any options to our local structure */ 305 /* copy any options to our local structure */
305/* Ignore options block for now - always use static mapping 306/* Ignore options block for now - always use static mapping
306 struct tc_ieee80211_qopt *qopt = RTA_DATA(opt); 307 struct tc_ieee80211_qopt *qopt = nla_data(opt);
307 308
308 if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) 309 if (opt->nla_len < nla_attr_size(sizeof(*qopt)))
309 return -EINVAL; 310 return -EINVAL;
310 memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue)); 311 memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue));
311*/ 312*/
@@ -314,7 +315,7 @@ static int wme_qdiscop_tune(struct Qdisc *qd, struct rtattr *opt)
314 315
315 316
316/* called during initial creation of qdisc on device */ 317/* called during initial creation of qdisc on device */
317static int wme_qdiscop_init(struct Qdisc *qd, struct rtattr *opt) 318static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
318{ 319{
319 struct ieee80211_sched_data *q = qdisc_priv(qd); 320 struct ieee80211_sched_data *q = qdisc_priv(qd);
320 struct net_device *dev = qd->dev; 321 struct net_device *dev = qd->dev;
@@ -369,10 +370,10 @@ static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb)
369 struct tc_ieee80211_qopt opt; 370 struct tc_ieee80211_qopt opt;
370 371
371 memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1); 372 memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1);
372 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 373 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
373*/ return skb->len; 374*/ return skb->len;
374/* 375/*
375rtattr_failure: 376nla_put_failure:
376 skb_trim(skb, p - skb->data);*/ 377 skb_trim(skb, p - skb->data);*/
377 return -1; 378 return -1;
378} 379}
@@ -443,7 +444,7 @@ static void wme_classop_put(struct Qdisc *q, unsigned long cl)
443 444
444 445
445static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent, 446static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
446 struct rtattr **tca, unsigned long *arg) 447 struct nlattr **tca, unsigned long *arg)
447{ 448{
448 unsigned long cl = *arg; 449 unsigned long cl = *arg;
449 struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); 450 struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
@@ -527,7 +528,7 @@ static struct tcf_proto ** wme_classop_find_tcf(struct Qdisc *qd,
527 528
528/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached) 529/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached)
529 * - these are the operations on the classes */ 530 * - these are the operations on the classes */
530static struct Qdisc_class_ops class_ops = 531static const struct Qdisc_class_ops class_ops =
531{ 532{
532 .graft = wme_classop_graft, 533 .graft = wme_classop_graft,
533 .leaf = wme_classop_leaf, 534 .leaf = wme_classop_leaf,
@@ -547,7 +548,7 @@ static struct Qdisc_class_ops class_ops =
547 548
548 549
549/* queueing discipline operations */ 550/* queueing discipline operations */
550static struct Qdisc_ops wme_qdisc_ops = 551static struct Qdisc_ops wme_qdisc_ops __read_mostly =
551{ 552{
552 .next = NULL, 553 .next = NULL,
553 .cl_ops = &class_ops, 554 .cl_ops = &class_ops,
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 6695efba57ec..6f04311cf0a0 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -245,16 +245,9 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx,
245ieee80211_txrx_result 245ieee80211_txrx_result
246ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx) 246ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx)
247{ 247{
248 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
249 u16 fc;
250 struct sk_buff *skb = tx->skb; 248 struct sk_buff *skb = tx->skb;
251 int wpa_test = 0, test = 0; 249 int wpa_test = 0, test = 0;
252 250
253 fc = le16_to_cpu(hdr->frame_control);
254
255 if (!WLAN_FC_DATA_PRESENT(fc))
256 return TXRX_CONTINUE;
257
258 tx->u.tx.control->icv_len = TKIP_ICV_LEN; 251 tx->u.tx.control->icv_len = TKIP_ICV_LEN;
259 tx->u.tx.control->iv_len = TKIP_IV_LEN; 252 tx->u.tx.control->iv_len = TKIP_IV_LEN;
260 ieee80211_tx_set_iswep(tx); 253 ieee80211_tx_set_iswep(tx);
@@ -323,9 +316,12 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_txrx_data *rx)
323 &rx->u.rx.tkip_iv32, 316 &rx->u.rx.tkip_iv32,
324 &rx->u.rx.tkip_iv16); 317 &rx->u.rx.tkip_iv16);
325 if (res != TKIP_DECRYPT_OK || wpa_test) { 318 if (res != TKIP_DECRYPT_OK || wpa_test) {
326 printk(KERN_DEBUG "%s: TKIP decrypt failed for RX frame from " 319#ifdef CONFIG_MAC80211_DEBUG
327 "%s (res=%d)\n", 320 if (net_ratelimit())
328 rx->dev->name, print_mac(mac, rx->sta->addr), res); 321 printk(KERN_DEBUG "%s: TKIP decrypt failed for RX "
322 "frame from %s (res=%d)\n", rx->dev->name,
323 print_mac(mac, rx->sta->addr), res);
324#endif /* CONFIG_MAC80211_DEBUG */
329 return TXRX_DROP; 325 return TXRX_DROP;
330 } 326 }
331 327
@@ -498,16 +494,9 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx,
498ieee80211_txrx_result 494ieee80211_txrx_result
499ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx) 495ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx)
500{ 496{
501 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
502 u16 fc;
503 struct sk_buff *skb = tx->skb; 497 struct sk_buff *skb = tx->skb;
504 int test = 0; 498 int test = 0;
505 499
506 fc = le16_to_cpu(hdr->frame_control);
507
508 if (!WLAN_FC_DATA_PRESENT(fc))
509 return TXRX_CONTINUE;
510
511 tx->u.tx.control->icv_len = CCMP_MIC_LEN; 500 tx->u.tx.control->icv_len = CCMP_MIC_LEN;
512 tx->u.tx.control->iv_len = CCMP_HDR_LEN; 501 tx->u.tx.control->iv_len = CCMP_HDR_LEN;
513 ieee80211_tx_set_iswep(tx); 502 ieee80211_tx_set_iswep(tx);
@@ -594,9 +583,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_txrx_data *rx)
594 skb->data + hdrlen + CCMP_HDR_LEN, data_len, 583 skb->data + hdrlen + CCMP_HDR_LEN, data_len,
595 skb->data + skb->len - CCMP_MIC_LEN, 584 skb->data + skb->len - CCMP_MIC_LEN,
596 skb->data + hdrlen + CCMP_HDR_LEN)) { 585 skb->data + hdrlen + CCMP_HDR_LEN)) {
597 printk(KERN_DEBUG "%s: CCMP decrypt failed for RX " 586#ifdef CONFIG_MAC80211_DEBUG
598 "frame from %s\n", rx->dev->name, 587 if (net_ratelimit())
599 print_mac(mac, rx->sta->addr)); 588 printk(KERN_DEBUG "%s: CCMP decrypt failed "
589 "for RX frame from %s\n", rx->dev->name,
590 print_mac(mac, rx->sta->addr));
591#endif /* CONFIG_MAC80211_DEBUG */
600 return TXRX_DROP; 592 return TXRX_DROP;
601 } 593 }
602 } 594 }
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d7a600a5720a..daf5b881064d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -2,21 +2,20 @@ menu "Core Netfilter Configuration"
2 depends on NET && INET && NETFILTER 2 depends on NET && INET && NETFILTER
3 3
4config NETFILTER_NETLINK 4config NETFILTER_NETLINK
5 tristate "Netfilter netlink interface" 5 tristate
6 help
7 If this option is enabled, the kernel will include support
8 for the new netfilter netlink interface.
9 6
10config NETFILTER_NETLINK_QUEUE 7config NETFILTER_NETLINK_QUEUE
11 tristate "Netfilter NFQUEUE over NFNETLINK interface" 8 tristate "Netfilter NFQUEUE over NFNETLINK interface"
12 depends on NETFILTER_NETLINK 9 depends on NETFILTER_ADVANCED
10 select NETFILTER_NETLINK
13 help 11 help
14 If this option is enabled, the kernel will include support 12 If this option is enabled, the kernel will include support
15 for queueing packets via NFNETLINK. 13 for queueing packets via NFNETLINK.
16 14
17config NETFILTER_NETLINK_LOG 15config NETFILTER_NETLINK_LOG
18 tristate "Netfilter LOG over NFNETLINK interface" 16 tristate "Netfilter LOG over NFNETLINK interface"
19 depends on NETFILTER_NETLINK 17 default m if NETFILTER_ADVANCED=n
18 select NETFILTER_NETLINK
20 help 19 help
21 If this option is enabled, the kernel will include support 20 If this option is enabled, the kernel will include support
22 for logging packets via NFNETLINK. 21 for logging packets via NFNETLINK.
@@ -25,9 +24,9 @@ config NETFILTER_NETLINK_LOG
25 and is also scheduled to replace the old syslog-based ipt_LOG 24 and is also scheduled to replace the old syslog-based ipt_LOG
26 and ip6t_LOG modules. 25 and ip6t_LOG modules.
27 26
28# Rename this to NF_CONNTRACK in a 2.6.25 27config NF_CONNTRACK
29config NF_CONNTRACK_ENABLED
30 tristate "Netfilter connection tracking support" 28 tristate "Netfilter connection tracking support"
29 default m if NETFILTER_ADVANCED=n
31 help 30 help
32 Connection tracking keeps a record of what packets have passed 31 Connection tracking keeps a record of what packets have passed
33 through your machine, in order to figure out how they are related 32 through your machine, in order to figure out how they are related
@@ -40,12 +39,9 @@ config NF_CONNTRACK_ENABLED
40 39
41 To compile it as a module, choose M here. If unsure, say N. 40 To compile it as a module, choose M here. If unsure, say N.
42 41
43config NF_CONNTRACK
44 tristate
45 default NF_CONNTRACK_ENABLED
46
47config NF_CT_ACCT 42config NF_CT_ACCT
48 bool "Connection tracking flow accounting" 43 bool "Connection tracking flow accounting"
44 depends on NETFILTER_ADVANCED
49 depends on NF_CONNTRACK 45 depends on NF_CONNTRACK
50 help 46 help
51 If this option is enabled, the connection tracking code will 47 If this option is enabled, the connection tracking code will
@@ -58,6 +54,7 @@ config NF_CT_ACCT
58 54
59config NF_CONNTRACK_MARK 55config NF_CONNTRACK_MARK
60 bool 'Connection mark tracking support' 56 bool 'Connection mark tracking support'
57 depends on NETFILTER_ADVANCED
61 depends on NF_CONNTRACK 58 depends on NF_CONNTRACK
62 help 59 help
63 This option enables support for connection marks, used by the 60 This option enables support for connection marks, used by the
@@ -68,6 +65,7 @@ config NF_CONNTRACK_MARK
68config NF_CONNTRACK_SECMARK 65config NF_CONNTRACK_SECMARK
69 bool 'Connection tracking security mark support' 66 bool 'Connection tracking security mark support'
70 depends on NF_CONNTRACK && NETWORK_SECMARK 67 depends on NF_CONNTRACK && NETWORK_SECMARK
68 default m if NETFILTER_ADVANCED=n
71 help 69 help
72 This option enables security markings to be applied to 70 This option enables security markings to be applied to
73 connections. Typically they are copied to connections from 71 connections. Typically they are copied to connections from
@@ -78,8 +76,9 @@ config NF_CONNTRACK_SECMARK
78 If unsure, say 'N'. 76 If unsure, say 'N'.
79 77
80config NF_CONNTRACK_EVENTS 78config NF_CONNTRACK_EVENTS
81 bool "Connection tracking events (EXPERIMENTAL)" 79 bool "Connection tracking events"
82 depends on EXPERIMENTAL && NF_CONNTRACK 80 depends on NF_CONNTRACK
81 depends on NETFILTER_ADVANCED
83 help 82 help
84 If this option is enabled, the connection tracking code will 83 If this option is enabled, the connection tracking code will
85 provide a notifier chain that can be used by other kernel code 84 provide a notifier chain that can be used by other kernel code
@@ -94,7 +93,7 @@ config NF_CT_PROTO_GRE
94config NF_CT_PROTO_SCTP 93config NF_CT_PROTO_SCTP
95 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' 94 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
96 depends on EXPERIMENTAL && NF_CONNTRACK 95 depends on EXPERIMENTAL && NF_CONNTRACK
97 default n 96 depends on NETFILTER_ADVANCED
98 help 97 help
99 With this option enabled, the layer 3 independent connection 98 With this option enabled, the layer 3 independent connection
100 tracking code will be able to do state tracking on SCTP connections. 99 tracking code will be able to do state tracking on SCTP connections.
@@ -103,8 +102,9 @@ config NF_CT_PROTO_SCTP
103 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 102 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
104 103
105config NF_CT_PROTO_UDPLITE 104config NF_CT_PROTO_UDPLITE
106 tristate 'UDP-Lite protocol connection tracking support (EXPERIMENTAL)' 105 tristate 'UDP-Lite protocol connection tracking support'
107 depends on EXPERIMENTAL && NF_CONNTRACK 106 depends on NF_CONNTRACK
107 depends on NETFILTER_ADVANCED
108 help 108 help
109 With this option enabled, the layer 3 independent connection 109 With this option enabled, the layer 3 independent connection
110 tracking code will be able to do state tracking on UDP-Lite 110 tracking code will be able to do state tracking on UDP-Lite
@@ -115,6 +115,7 @@ config NF_CT_PROTO_UDPLITE
115config NF_CONNTRACK_AMANDA 115config NF_CONNTRACK_AMANDA
116 tristate "Amanda backup protocol support" 116 tristate "Amanda backup protocol support"
117 depends on NF_CONNTRACK 117 depends on NF_CONNTRACK
118 depends on NETFILTER_ADVANCED
118 select TEXTSEARCH 119 select TEXTSEARCH
119 select TEXTSEARCH_KMP 120 select TEXTSEARCH_KMP
120 help 121 help
@@ -130,6 +131,7 @@ config NF_CONNTRACK_AMANDA
130config NF_CONNTRACK_FTP 131config NF_CONNTRACK_FTP
131 tristate "FTP protocol support" 132 tristate "FTP protocol support"
132 depends on NF_CONNTRACK 133 depends on NF_CONNTRACK
134 default m if NETFILTER_ADVANCED=n
133 help 135 help
134 Tracking FTP connections is problematic: special helpers are 136 Tracking FTP connections is problematic: special helpers are
135 required for tracking them, and doing masquerading and other forms 137 required for tracking them, and doing masquerading and other forms
@@ -142,8 +144,9 @@ config NF_CONNTRACK_FTP
142 To compile it as a module, choose M here. If unsure, say N. 144 To compile it as a module, choose M here. If unsure, say N.
143 145
144config NF_CONNTRACK_H323 146config NF_CONNTRACK_H323
145 tristate "H.323 protocol support (EXPERIMENTAL)" 147 tristate "H.323 protocol support"
146 depends on EXPERIMENTAL && NF_CONNTRACK && (IPV6 || IPV6=n) 148 depends on NF_CONNTRACK && (IPV6 || IPV6=n)
149 depends on NETFILTER_ADVANCED
147 help 150 help
148 H.323 is a VoIP signalling protocol from ITU-T. As one of the most 151 H.323 is a VoIP signalling protocol from ITU-T. As one of the most
149 important VoIP protocols, it is widely used by voice hardware and 152 important VoIP protocols, it is widely used by voice hardware and
@@ -163,6 +166,7 @@ config NF_CONNTRACK_H323
163config NF_CONNTRACK_IRC 166config NF_CONNTRACK_IRC
164 tristate "IRC protocol support" 167 tristate "IRC protocol support"
165 depends on NF_CONNTRACK 168 depends on NF_CONNTRACK
169 default m if NETFILTER_ADVANCED=n
166 help 170 help
167 There is a commonly-used extension to IRC called 171 There is a commonly-used extension to IRC called
168 Direct Client-to-Client Protocol (DCC). This enables users to send 172 Direct Client-to-Client Protocol (DCC). This enables users to send
@@ -176,8 +180,9 @@ config NF_CONNTRACK_IRC
176 To compile it as a module, choose M here. If unsure, say N. 180 To compile it as a module, choose M here. If unsure, say N.
177 181
178config NF_CONNTRACK_NETBIOS_NS 182config NF_CONNTRACK_NETBIOS_NS
179 tristate "NetBIOS name service protocol support (EXPERIMENTAL)" 183 tristate "NetBIOS name service protocol support"
180 depends on EXPERIMENTAL && NF_CONNTRACK 184 depends on NF_CONNTRACK
185 depends on NETFILTER_ADVANCED
181 help 186 help
182 NetBIOS name service requests are sent as broadcast messages from an 187 NetBIOS name service requests are sent as broadcast messages from an
183 unprivileged port and responded to with unicast messages to the 188 unprivileged port and responded to with unicast messages to the
@@ -197,6 +202,7 @@ config NF_CONNTRACK_NETBIOS_NS
197config NF_CONNTRACK_PPTP 202config NF_CONNTRACK_PPTP
198 tristate "PPtP protocol support" 203 tristate "PPtP protocol support"
199 depends on NF_CONNTRACK 204 depends on NF_CONNTRACK
205 depends on NETFILTER_ADVANCED
200 select NF_CT_PROTO_GRE 206 select NF_CT_PROTO_GRE
201 help 207 help
202 This module adds support for PPTP (Point to Point Tunnelling 208 This module adds support for PPTP (Point to Point Tunnelling
@@ -216,6 +222,7 @@ config NF_CONNTRACK_PPTP
216config NF_CONNTRACK_SANE 222config NF_CONNTRACK_SANE
217 tristate "SANE protocol support (EXPERIMENTAL)" 223 tristate "SANE protocol support (EXPERIMENTAL)"
218 depends on EXPERIMENTAL && NF_CONNTRACK 224 depends on EXPERIMENTAL && NF_CONNTRACK
225 depends on NETFILTER_ADVANCED
219 help 226 help
220 SANE is a protocol for remote access to scanners as implemented 227 SANE is a protocol for remote access to scanners as implemented
221 by the 'saned' daemon. Like FTP, it uses separate control and 228 by the 'saned' daemon. Like FTP, it uses separate control and
@@ -227,8 +234,9 @@ config NF_CONNTRACK_SANE
227 To compile it as a module, choose M here. If unsure, say N. 234 To compile it as a module, choose M here. If unsure, say N.
228 235
229config NF_CONNTRACK_SIP 236config NF_CONNTRACK_SIP
230 tristate "SIP protocol support (EXPERIMENTAL)" 237 tristate "SIP protocol support"
231 depends on EXPERIMENTAL && NF_CONNTRACK 238 depends on NF_CONNTRACK
239 default m if NETFILTER_ADVANCED=n
232 help 240 help
233 SIP is an application-layer control protocol that can establish, 241 SIP is an application-layer control protocol that can establish,
234 modify, and terminate multimedia sessions (conferences) such as 242 modify, and terminate multimedia sessions (conferences) such as
@@ -241,6 +249,7 @@ config NF_CONNTRACK_SIP
241config NF_CONNTRACK_TFTP 249config NF_CONNTRACK_TFTP
242 tristate "TFTP protocol support" 250 tristate "TFTP protocol support"
243 depends on NF_CONNTRACK 251 depends on NF_CONNTRACK
252 depends on NETFILTER_ADVANCED
244 help 253 help
245 TFTP connection tracking helper, this is required depending 254 TFTP connection tracking helper, this is required depending
246 on how restrictive your ruleset is. 255 on how restrictive your ruleset is.
@@ -250,15 +259,17 @@ config NF_CONNTRACK_TFTP
250 To compile it as a module, choose M here. If unsure, say N. 259 To compile it as a module, choose M here. If unsure, say N.
251 260
252config NF_CT_NETLINK 261config NF_CT_NETLINK
253 tristate 'Connection tracking netlink interface (EXPERIMENTAL)' 262 tristate 'Connection tracking netlink interface'
254 depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK 263 depends on NF_CONNTRACK
255 depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m 264 select NETFILTER_NETLINK
256 depends on NF_NAT=n || NF_NAT 265 depends on NF_NAT=n || NF_NAT
266 default m if NETFILTER_ADVANCED=n
257 help 267 help
258 This option enables support for a netlink-based userspace interface 268 This option enables support for a netlink-based userspace interface
259 269
260config NETFILTER_XTABLES 270config NETFILTER_XTABLES
261 tristate "Netfilter Xtables support (required for ip_tables)" 271 tristate "Netfilter Xtables support (required for ip_tables)"
272 default m if NETFILTER_ADVANCED=n
262 help 273 help
263 This is required if you intend to use any of ip_tables, 274 This is required if you intend to use any of ip_tables,
264 ip6_tables or arp_tables. 275 ip6_tables or arp_tables.
@@ -268,6 +279,7 @@ config NETFILTER_XTABLES
268config NETFILTER_XT_TARGET_CLASSIFY 279config NETFILTER_XT_TARGET_CLASSIFY
269 tristate '"CLASSIFY" target support' 280 tristate '"CLASSIFY" target support'
270 depends on NETFILTER_XTABLES 281 depends on NETFILTER_XTABLES
282 depends on NETFILTER_ADVANCED
271 help 283 help
272 This option adds a `CLASSIFY' target, which enables the user to set 284 This option adds a `CLASSIFY' target, which enables the user to set
273 the priority of a packet. Some qdiscs can use this value for 285 the priority of a packet. Some qdiscs can use this value for
@@ -282,31 +294,38 @@ config NETFILTER_XT_TARGET_CONNMARK
282 depends on NETFILTER_XTABLES 294 depends on NETFILTER_XTABLES
283 depends on IP_NF_MANGLE || IP6_NF_MANGLE 295 depends on IP_NF_MANGLE || IP6_NF_MANGLE
284 depends on NF_CONNTRACK 296 depends on NF_CONNTRACK
297 depends on NETFILTER_ADVANCED
285 select NF_CONNTRACK_MARK 298 select NF_CONNTRACK_MARK
286 help 299 help
287 This option adds a `CONNMARK' target, which allows one to manipulate 300 This option adds a `CONNMARK' target, which allows one to manipulate
288 the connection mark value. Similar to the MARK target, but 301 the connection mark value. Similar to the MARK target, but
289 affects the connection mark value rather than the packet mark value. 302 affects the connection mark value rather than the packet mark value.
290 303
291 If you want to compile it as a module, say M here and read 304 If you want to compile it as a module, say M here and read
292 <file:Documentation/kbuild/modules.txt>. The module will be called 305 <file:Documentation/kbuild/modules.txt>. The module will be called
293 ipt_CONNMARK.ko. If unsure, say `N'. 306 ipt_CONNMARK.ko. If unsure, say `N'.
294 307
295config NETFILTER_XT_TARGET_DSCP 308config NETFILTER_XT_TARGET_DSCP
296 tristate '"DSCP" target support' 309 tristate '"DSCP" and "TOS" target support'
297 depends on NETFILTER_XTABLES 310 depends on NETFILTER_XTABLES
298 depends on IP_NF_MANGLE || IP6_NF_MANGLE 311 depends on IP_NF_MANGLE || IP6_NF_MANGLE
312 depends on NETFILTER_ADVANCED
299 help 313 help
300 This option adds a `DSCP' target, which allows you to manipulate 314 This option adds a `DSCP' target, which allows you to manipulate
301 the IPv4/IPv6 header DSCP field (differentiated services codepoint). 315 the IPv4/IPv6 header DSCP field (differentiated services codepoint).
302 316
303 The DSCP field can have any value between 0x0 and 0x3f inclusive. 317 The DSCP field can have any value between 0x0 and 0x3f inclusive.
304 318
319 It also adds the "TOS" target, which allows you to create rules in
320 the "mangle" table which alter the Type Of Service field of an IPv4
321 or the Priority field of an IPv6 packet, prior to routing.
322
305 To compile it as a module, choose M here. If unsure, say N. 323 To compile it as a module, choose M here. If unsure, say N.
306 324
307config NETFILTER_XT_TARGET_MARK 325config NETFILTER_XT_TARGET_MARK
308 tristate '"MARK" target support' 326 tristate '"MARK" target support'
309 depends on NETFILTER_XTABLES 327 depends on NETFILTER_XTABLES
328 default m if NETFILTER_ADVANCED=n
310 help 329 help
311 This option adds a `MARK' target, which allows you to create rules 330 This option adds a `MARK' target, which allows you to create rules
312 in the `mangle' table which alter the netfilter mark (nfmark) field 331 in the `mangle' table which alter the netfilter mark (nfmark) field
@@ -320,6 +339,7 @@ config NETFILTER_XT_TARGET_MARK
320config NETFILTER_XT_TARGET_NFQUEUE 339config NETFILTER_XT_TARGET_NFQUEUE
321 tristate '"NFQUEUE" target Support' 340 tristate '"NFQUEUE" target Support'
322 depends on NETFILTER_XTABLES 341 depends on NETFILTER_XTABLES
342 depends on NETFILTER_ADVANCED
323 help 343 help
324 This target replaced the old obsolete QUEUE target. 344 This target replaced the old obsolete QUEUE target.
325 345
@@ -331,6 +351,7 @@ config NETFILTER_XT_TARGET_NFQUEUE
331config NETFILTER_XT_TARGET_NFLOG 351config NETFILTER_XT_TARGET_NFLOG
332 tristate '"NFLOG" target support' 352 tristate '"NFLOG" target support'
333 depends on NETFILTER_XTABLES 353 depends on NETFILTER_XTABLES
354 default m if NETFILTER_ADVANCED=n
334 help 355 help
335 This option enables the NFLOG target, which allows to LOG 356 This option enables the NFLOG target, which allows to LOG
336 messages through the netfilter logging API, which can use 357 messages through the netfilter logging API, which can use
@@ -344,30 +365,44 @@ config NETFILTER_XT_TARGET_NOTRACK
344 depends on NETFILTER_XTABLES 365 depends on NETFILTER_XTABLES
345 depends on IP_NF_RAW || IP6_NF_RAW 366 depends on IP_NF_RAW || IP6_NF_RAW
346 depends on NF_CONNTRACK 367 depends on NF_CONNTRACK
368 depends on NETFILTER_ADVANCED
347 help 369 help
348 The NOTRACK target allows a select rule to specify 370 The NOTRACK target allows a select rule to specify
349 which packets *not* to enter the conntrack/NAT 371 which packets *not* to enter the conntrack/NAT
350 subsystem with all the consequences (no ICMP error tracking, 372 subsystem with all the consequences (no ICMP error tracking,
351 no protocol helpers for the selected packets). 373 no protocol helpers for the selected packets).
352 374
353 If you want to compile it as a module, say M here and read 375 If you want to compile it as a module, say M here and read
354 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 376 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
355 377
378config NETFILTER_XT_TARGET_RATEEST
379 tristate '"RATEEST" target support'
380 depends on NETFILTER_XTABLES
381 depends on NETFILTER_ADVANCED
382 help
383 This option adds a `RATEEST' target, which allows to measure
384 rates similar to TC estimators. The `rateest' match can be
385 used to match on the measured rates.
386
387 To compile it as a module, choose M here. If unsure, say N.
388
356config NETFILTER_XT_TARGET_TRACE 389config NETFILTER_XT_TARGET_TRACE
357 tristate '"TRACE" target support' 390 tristate '"TRACE" target support'
358 depends on NETFILTER_XTABLES 391 depends on NETFILTER_XTABLES
359 depends on IP_NF_RAW || IP6_NF_RAW 392 depends on IP_NF_RAW || IP6_NF_RAW
393 depends on NETFILTER_ADVANCED
360 help 394 help
361 The TRACE target allows you to mark packets so that the kernel 395 The TRACE target allows you to mark packets so that the kernel
362 will log every rule which match the packets as those traverse 396 will log every rule which match the packets as those traverse
363 the tables, chains, rules. 397 the tables, chains, rules.
364 398
365 If you want to compile it as a module, say M here and read 399 If you want to compile it as a module, say M here and read
366 <file:Documentation/modules.txt>. If unsure, say `N'. 400 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
367 401
368config NETFILTER_XT_TARGET_SECMARK 402config NETFILTER_XT_TARGET_SECMARK
369 tristate '"SECMARK" target support' 403 tristate '"SECMARK" target support'
370 depends on NETFILTER_XTABLES && NETWORK_SECMARK 404 depends on NETFILTER_XTABLES && NETWORK_SECMARK
405 default m if NETFILTER_ADVANCED=n
371 help 406 help
372 The SECMARK target allows security marking of network 407 The SECMARK target allows security marking of network
373 packets, for use with security subsystems. 408 packets, for use with security subsystems.
@@ -377,6 +412,7 @@ config NETFILTER_XT_TARGET_SECMARK
377config NETFILTER_XT_TARGET_CONNSECMARK 412config NETFILTER_XT_TARGET_CONNSECMARK
378 tristate '"CONNSECMARK" target support' 413 tristate '"CONNSECMARK" target support'
379 depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK 414 depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
415 default m if NETFILTER_ADVANCED=n
380 help 416 help
381 The CONNSECMARK target copies security markings from packets 417 The CONNSECMARK target copies security markings from packets
382 to connections, and restores security markings from connections 418 to connections, and restores security markings from connections
@@ -388,6 +424,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK
388config NETFILTER_XT_TARGET_TCPMSS 424config NETFILTER_XT_TARGET_TCPMSS
389 tristate '"TCPMSS" target support' 425 tristate '"TCPMSS" target support'
390 depends on NETFILTER_XTABLES && (IPV6 || IPV6=n) 426 depends on NETFILTER_XTABLES && (IPV6 || IPV6=n)
427 default m if NETFILTER_ADVANCED=n
391 ---help--- 428 ---help---
392 This option adds a `TCPMSS' target, which allows you to alter the 429 This option adds a `TCPMSS' target, which allows you to alter the
393 MSS value of TCP SYN packets, to control the maximum size for that 430 MSS value of TCP SYN packets, to control the maximum size for that
@@ -411,9 +448,19 @@ config NETFILTER_XT_TARGET_TCPMSS
411 448
412 To compile it as a module, choose M here. If unsure, say N. 449 To compile it as a module, choose M here. If unsure, say N.
413 450
451config NETFILTER_XT_TARGET_TCPOPTSTRIP
452 tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
453 depends on EXPERIMENTAL && NETFILTER_XTABLES
454 depends on IP_NF_MANGLE || IP6_NF_MANGLE
455 depends on NETFILTER_ADVANCED
456 help
457 This option adds a "TCPOPTSTRIP" target, which allows you to strip
458 TCP options from TCP packets.
459
414config NETFILTER_XT_MATCH_COMMENT 460config NETFILTER_XT_MATCH_COMMENT
415 tristate '"comment" match support' 461 tristate '"comment" match support'
416 depends on NETFILTER_XTABLES 462 depends on NETFILTER_XTABLES
463 depends on NETFILTER_ADVANCED
417 help 464 help
418 This option adds a `comment' dummy-match, which allows you to put 465 This option adds a `comment' dummy-match, which allows you to put
419 comments in your iptables ruleset. 466 comments in your iptables ruleset.
@@ -425,6 +472,7 @@ config NETFILTER_XT_MATCH_CONNBYTES
425 tristate '"connbytes" per-connection counter match support' 472 tristate '"connbytes" per-connection counter match support'
426 depends on NETFILTER_XTABLES 473 depends on NETFILTER_XTABLES
427 depends on NF_CONNTRACK 474 depends on NF_CONNTRACK
475 depends on NETFILTER_ADVANCED
428 select NF_CT_ACCT 476 select NF_CT_ACCT
429 help 477 help
430 This option adds a `connbytes' match, which allows you to match the 478 This option adds a `connbytes' match, which allows you to match the
@@ -437,6 +485,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
437 tristate '"connlimit" match support"' 485 tristate '"connlimit" match support"'
438 depends on NETFILTER_XTABLES 486 depends on NETFILTER_XTABLES
439 depends on NF_CONNTRACK 487 depends on NF_CONNTRACK
488 depends on NETFILTER_ADVANCED
440 ---help--- 489 ---help---
441 This match allows you to match against the number of parallel 490 This match allows you to match against the number of parallel
442 connections to a server per client IP address (or address block). 491 connections to a server per client IP address (or address block).
@@ -445,11 +494,12 @@ config NETFILTER_XT_MATCH_CONNMARK
445 tristate '"connmark" connection mark match support' 494 tristate '"connmark" connection mark match support'
446 depends on NETFILTER_XTABLES 495 depends on NETFILTER_XTABLES
447 depends on NF_CONNTRACK 496 depends on NF_CONNTRACK
497 depends on NETFILTER_ADVANCED
448 select NF_CONNTRACK_MARK 498 select NF_CONNTRACK_MARK
449 help 499 help
450 This option adds a `connmark' match, which allows you to match the 500 This option adds a `connmark' match, which allows you to match the
451 connection mark value previously set for the session by `CONNMARK'. 501 connection mark value previously set for the session by `CONNMARK'.
452 502
453 If you want to compile it as a module, say M here and read 503 If you want to compile it as a module, say M here and read
454 <file:Documentation/kbuild/modules.txt>. The module will be called 504 <file:Documentation/kbuild/modules.txt>. The module will be called
455 ipt_connmark.ko. If unsure, say `N'. 505 ipt_connmark.ko. If unsure, say `N'.
@@ -458,6 +508,7 @@ config NETFILTER_XT_MATCH_CONNTRACK
458 tristate '"conntrack" connection tracking match support' 508 tristate '"conntrack" connection tracking match support'
459 depends on NETFILTER_XTABLES 509 depends on NETFILTER_XTABLES
460 depends on NF_CONNTRACK 510 depends on NF_CONNTRACK
511 default m if NETFILTER_ADVANCED=n
461 help 512 help
462 This is a general conntrack match module, a superset of the state match. 513 This is a general conntrack match module, a superset of the state match.
463 514
@@ -468,8 +519,9 @@ config NETFILTER_XT_MATCH_CONNTRACK
468 To compile it as a module, choose M here. If unsure, say N. 519 To compile it as a module, choose M here. If unsure, say N.
469 520
470config NETFILTER_XT_MATCH_DCCP 521config NETFILTER_XT_MATCH_DCCP
471 tristate '"DCCP" protocol match support' 522 tristate '"dccp" protocol match support'
472 depends on NETFILTER_XTABLES 523 depends on NETFILTER_XTABLES
524 depends on NETFILTER_ADVANCED
473 help 525 help
474 With this option enabled, you will be able to use the iptables 526 With this option enabled, you will be able to use the iptables
475 `dccp' match in order to match on DCCP source/destination ports 527 `dccp' match in order to match on DCCP source/destination ports
@@ -479,19 +531,25 @@ config NETFILTER_XT_MATCH_DCCP
479 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 531 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
480 532
481config NETFILTER_XT_MATCH_DSCP 533config NETFILTER_XT_MATCH_DSCP
482 tristate '"DSCP" match support' 534 tristate '"dscp" and "tos" match support'
483 depends on NETFILTER_XTABLES 535 depends on NETFILTER_XTABLES
536 depends on NETFILTER_ADVANCED
484 help 537 help
485 This option adds a `DSCP' match, which allows you to match against 538 This option adds a `DSCP' match, which allows you to match against
486 the IPv4/IPv6 header DSCP field (differentiated services codepoint). 539 the IPv4/IPv6 header DSCP field (differentiated services codepoint).
487 540
488 The DSCP field can have any value between 0x0 and 0x3f inclusive. 541 The DSCP field can have any value between 0x0 and 0x3f inclusive.
489 542
543 It will also add a "tos" match, which allows you to match packets
544 based on the Type Of Service fields of the IPv4 packet (which share
545 the same bits as DSCP).
546
490 To compile it as a module, choose M here. If unsure, say N. 547 To compile it as a module, choose M here. If unsure, say N.
491 548
492config NETFILTER_XT_MATCH_ESP 549config NETFILTER_XT_MATCH_ESP
493 tristate '"ESP" match support' 550 tristate '"esp" match support'
494 depends on NETFILTER_XTABLES 551 depends on NETFILTER_XTABLES
552 depends on NETFILTER_ADVANCED
495 help 553 help
496 This match extension allows you to match a range of SPIs 554 This match extension allows you to match a range of SPIs
497 inside ESP header of IPSec packets. 555 inside ESP header of IPSec packets.
@@ -502,15 +560,28 @@ config NETFILTER_XT_MATCH_HELPER
502 tristate '"helper" match support' 560 tristate '"helper" match support'
503 depends on NETFILTER_XTABLES 561 depends on NETFILTER_XTABLES
504 depends on NF_CONNTRACK 562 depends on NF_CONNTRACK
563 depends on NETFILTER_ADVANCED
505 help 564 help
506 Helper matching allows you to match packets in dynamic connections 565 Helper matching allows you to match packets in dynamic connections
507 tracked by a conntrack-helper, ie. ip_conntrack_ftp 566 tracked by a conntrack-helper, ie. ip_conntrack_ftp
508 567
509 To compile it as a module, choose M here. If unsure, say Y. 568 To compile it as a module, choose M here. If unsure, say Y.
510 569
570config NETFILTER_XT_MATCH_IPRANGE
571 tristate '"iprange" address range match support'
572 depends on NETFILTER_XTABLES
573 depends on NETFILTER_ADVANCED
574 ---help---
575 This option adds a "iprange" match, which allows you to match based on
576 an IP address range. (Normal iptables only matches on single addresses
577 with an optional mask.)
578
579 If unsure, say M.
580
511config NETFILTER_XT_MATCH_LENGTH 581config NETFILTER_XT_MATCH_LENGTH
512 tristate '"length" match support' 582 tristate '"length" match support'
513 depends on NETFILTER_XTABLES 583 depends on NETFILTER_XTABLES
584 depends on NETFILTER_ADVANCED
514 help 585 help
515 This option allows you to match the length of a packet against a 586 This option allows you to match the length of a packet against a
516 specific value or range of values. 587 specific value or range of values.
@@ -520,6 +591,7 @@ config NETFILTER_XT_MATCH_LENGTH
520config NETFILTER_XT_MATCH_LIMIT 591config NETFILTER_XT_MATCH_LIMIT
521 tristate '"limit" match support' 592 tristate '"limit" match support'
522 depends on NETFILTER_XTABLES 593 depends on NETFILTER_XTABLES
594 depends on NETFILTER_ADVANCED
523 help 595 help
524 limit matching allows you to control the rate at which a rule can be 596 limit matching allows you to control the rate at which a rule can be
525 matched: mainly useful in combination with the LOG target ("LOG 597 matched: mainly useful in combination with the LOG target ("LOG
@@ -530,6 +602,7 @@ config NETFILTER_XT_MATCH_LIMIT
530config NETFILTER_XT_MATCH_MAC 602config NETFILTER_XT_MATCH_MAC
531 tristate '"mac" address match support' 603 tristate '"mac" address match support'
532 depends on NETFILTER_XTABLES 604 depends on NETFILTER_XTABLES
605 depends on NETFILTER_ADVANCED
533 help 606 help
534 MAC matching allows you to match packets based on the source 607 MAC matching allows you to match packets based on the source
535 Ethernet address of the packet. 608 Ethernet address of the packet.
@@ -539,6 +612,7 @@ config NETFILTER_XT_MATCH_MAC
539config NETFILTER_XT_MATCH_MARK 612config NETFILTER_XT_MATCH_MARK
540 tristate '"mark" match support' 613 tristate '"mark" match support'
541 depends on NETFILTER_XTABLES 614 depends on NETFILTER_XTABLES
615 default m if NETFILTER_ADVANCED=n
542 help 616 help
543 Netfilter mark matching allows you to match packets based on the 617 Netfilter mark matching allows you to match packets based on the
544 `nfmark' value in the packet. This can be set by the MARK target 618 `nfmark' value in the packet. This can be set by the MARK target
@@ -546,9 +620,19 @@ config NETFILTER_XT_MATCH_MARK
546 620
547 To compile it as a module, choose M here. If unsure, say N. 621 To compile it as a module, choose M here. If unsure, say N.
548 622
623config NETFILTER_XT_MATCH_OWNER
624 tristate '"owner" match support'
625 depends on NETFILTER_XTABLES
626 depends on NETFILTER_ADVANCED
627 ---help---
628 Socket owner matching allows you to match locally-generated packets
629 based on who created the socket: the user or group. It is also
630 possible to check whether a socket actually exists.
631
549config NETFILTER_XT_MATCH_POLICY 632config NETFILTER_XT_MATCH_POLICY
550 tristate 'IPsec "policy" match support' 633 tristate 'IPsec "policy" match support'
551 depends on NETFILTER_XTABLES && XFRM 634 depends on NETFILTER_XTABLES && XFRM
635 default m if NETFILTER_ADVANCED=n
552 help 636 help
553 Policy matching allows you to match packets based on the 637 Policy matching allows you to match packets based on the
554 IPsec policy that was used during decapsulation/will 638 IPsec policy that was used during decapsulation/will
@@ -557,8 +641,9 @@ config NETFILTER_XT_MATCH_POLICY
557 To compile it as a module, choose M here. If unsure, say N. 641 To compile it as a module, choose M here. If unsure, say N.
558 642
559config NETFILTER_XT_MATCH_MULTIPORT 643config NETFILTER_XT_MATCH_MULTIPORT
560 tristate "Multiple port match support" 644 tristate '"multiport" Multiple port match support'
561 depends on NETFILTER_XTABLES 645 depends on NETFILTER_XTABLES
646 depends on NETFILTER_ADVANCED
562 help 647 help
563 Multiport matching allows you to match TCP or UDP packets based on 648 Multiport matching allows you to match TCP or UDP packets based on
564 a series of source or destination ports: normally a rule can only 649 a series of source or destination ports: normally a rule can only
@@ -569,6 +654,7 @@ config NETFILTER_XT_MATCH_MULTIPORT
569config NETFILTER_XT_MATCH_PHYSDEV 654config NETFILTER_XT_MATCH_PHYSDEV
570 tristate '"physdev" match support' 655 tristate '"physdev" match support'
571 depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER 656 depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
657 depends on NETFILTER_ADVANCED
572 help 658 help
573 Physdev packet matching matches against the physical bridge ports 659 Physdev packet matching matches against the physical bridge ports
574 the IP packet arrived on or will leave by. 660 the IP packet arrived on or will leave by.
@@ -578,6 +664,7 @@ config NETFILTER_XT_MATCH_PHYSDEV
578config NETFILTER_XT_MATCH_PKTTYPE 664config NETFILTER_XT_MATCH_PKTTYPE
579 tristate '"pkttype" packet type match support' 665 tristate '"pkttype" packet type match support'
580 depends on NETFILTER_XTABLES 666 depends on NETFILTER_XTABLES
667 depends on NETFILTER_ADVANCED
581 help 668 help
582 Packet type matching allows you to match a packet by 669 Packet type matching allows you to match a packet by
583 its "class", eg. BROADCAST, MULTICAST, ... 670 its "class", eg. BROADCAST, MULTICAST, ...
@@ -590,6 +677,7 @@ config NETFILTER_XT_MATCH_PKTTYPE
590config NETFILTER_XT_MATCH_QUOTA 677config NETFILTER_XT_MATCH_QUOTA
591 tristate '"quota" match support' 678 tristate '"quota" match support'
592 depends on NETFILTER_XTABLES 679 depends on NETFILTER_XTABLES
680 depends on NETFILTER_ADVANCED
593 help 681 help
594 This option adds a `quota' match, which allows to match on a 682 This option adds a `quota' match, which allows to match on a
595 byte counter. 683 byte counter.
@@ -597,23 +685,36 @@ config NETFILTER_XT_MATCH_QUOTA
597 If you want to compile it as a module, say M here and read 685 If you want to compile it as a module, say M here and read
598 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 686 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
599 687
688config NETFILTER_XT_MATCH_RATEEST
689 tristate '"rateest" match support'
690 depends on NETFILTER_XTABLES
691 depends on NETFILTER_ADVANCED
692 select NETFILTER_XT_TARGET_RATEEST
693 help
694 This option adds a `rateest' match, which allows to match on the
695 rate estimated by the RATEEST target.
696
697 To compile it as a module, choose M here. If unsure, say N.
698
600config NETFILTER_XT_MATCH_REALM 699config NETFILTER_XT_MATCH_REALM
601 tristate '"realm" match support' 700 tristate '"realm" match support'
602 depends on NETFILTER_XTABLES 701 depends on NETFILTER_XTABLES
702 depends on NETFILTER_ADVANCED
603 select NET_CLS_ROUTE 703 select NET_CLS_ROUTE
604 help 704 help
605 This option adds a `realm' match, which allows you to use the realm 705 This option adds a `realm' match, which allows you to use the realm
606 key from the routing subsystem inside iptables. 706 key from the routing subsystem inside iptables.
607 707
608 This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 708 This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
609 in tc world. 709 in tc world.
610 710
611 If you want to compile it as a module, say M here and read 711 If you want to compile it as a module, say M here and read
612 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 712 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
613 713
614config NETFILTER_XT_MATCH_SCTP 714config NETFILTER_XT_MATCH_SCTP
615 tristate '"sctp" protocol match support (EXPERIMENTAL)' 715 tristate '"sctp" protocol match support (EXPERIMENTAL)'
616 depends on NETFILTER_XTABLES && EXPERIMENTAL 716 depends on NETFILTER_XTABLES && EXPERIMENTAL
717 depends on NETFILTER_ADVANCED
617 help 718 help
618 With this option enabled, you will be able to use the 719 With this option enabled, you will be able to use the
619 `sctp' match in order to match on SCTP source/destination ports 720 `sctp' match in order to match on SCTP source/destination ports
@@ -626,6 +727,7 @@ config NETFILTER_XT_MATCH_STATE
626 tristate '"state" match support' 727 tristate '"state" match support'
627 depends on NETFILTER_XTABLES 728 depends on NETFILTER_XTABLES
628 depends on NF_CONNTRACK 729 depends on NF_CONNTRACK
730 default m if NETFILTER_ADVANCED=n
629 help 731 help
630 Connection state matching allows you to match packets based on their 732 Connection state matching allows you to match packets based on their
631 relationship to a tracked connection (ie. previous packets). This 733 relationship to a tracked connection (ie. previous packets). This
@@ -636,6 +738,7 @@ config NETFILTER_XT_MATCH_STATE
636config NETFILTER_XT_MATCH_STATISTIC 738config NETFILTER_XT_MATCH_STATISTIC
637 tristate '"statistic" match support' 739 tristate '"statistic" match support'
638 depends on NETFILTER_XTABLES 740 depends on NETFILTER_XTABLES
741 depends on NETFILTER_ADVANCED
639 help 742 help
640 This option adds a `statistic' match, which allows you to match 743 This option adds a `statistic' match, which allows you to match
641 on packets periodically or randomly with a given percentage. 744 on packets periodically or randomly with a given percentage.
@@ -645,6 +748,7 @@ config NETFILTER_XT_MATCH_STATISTIC
645config NETFILTER_XT_MATCH_STRING 748config NETFILTER_XT_MATCH_STRING
646 tristate '"string" match support' 749 tristate '"string" match support'
647 depends on NETFILTER_XTABLES 750 depends on NETFILTER_XTABLES
751 depends on NETFILTER_ADVANCED
648 select TEXTSEARCH 752 select TEXTSEARCH
649 select TEXTSEARCH_KMP 753 select TEXTSEARCH_KMP
650 select TEXTSEARCH_BM 754 select TEXTSEARCH_BM
@@ -658,6 +762,7 @@ config NETFILTER_XT_MATCH_STRING
658config NETFILTER_XT_MATCH_TCPMSS 762config NETFILTER_XT_MATCH_TCPMSS
659 tristate '"tcpmss" match support' 763 tristate '"tcpmss" match support'
660 depends on NETFILTER_XTABLES 764 depends on NETFILTER_XTABLES
765 depends on NETFILTER_ADVANCED
661 help 766 help
662 This option adds a `tcpmss' match, which allows you to examine the 767 This option adds a `tcpmss' match, which allows you to examine the
663 MSS value of TCP SYN packets, which control the maximum packet size 768 MSS value of TCP SYN packets, which control the maximum packet size
@@ -668,6 +773,7 @@ config NETFILTER_XT_MATCH_TCPMSS
668config NETFILTER_XT_MATCH_TIME 773config NETFILTER_XT_MATCH_TIME
669 tristate '"time" match support' 774 tristate '"time" match support'
670 depends on NETFILTER_XTABLES 775 depends on NETFILTER_XTABLES
776 depends on NETFILTER_ADVANCED
671 ---help--- 777 ---help---
672 This option adds a "time" match, which allows you to match based on 778 This option adds a "time" match, which allows you to match based on
673 the packet arrival time (at the machine which netfilter is running) 779 the packet arrival time (at the machine which netfilter is running)
@@ -682,6 +788,7 @@ config NETFILTER_XT_MATCH_TIME
682config NETFILTER_XT_MATCH_U32 788config NETFILTER_XT_MATCH_U32
683 tristate '"u32" match support' 789 tristate '"u32" match support'
684 depends on NETFILTER_XTABLES 790 depends on NETFILTER_XTABLES
791 depends on NETFILTER_ADVANCED
685 ---help--- 792 ---help---
686 u32 allows you to extract quantities of up to 4 bytes from a packet, 793 u32 allows you to extract quantities of up to 4 bytes from a packet,
687 AND them with specified masks, shift them by specified amounts and 794 AND them with specified masks, shift them by specified amounts and
@@ -695,6 +802,7 @@ config NETFILTER_XT_MATCH_U32
695config NETFILTER_XT_MATCH_HASHLIMIT 802config NETFILTER_XT_MATCH_HASHLIMIT
696 tristate '"hashlimit" match support' 803 tristate '"hashlimit" match support'
697 depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) 804 depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
805 depends on NETFILTER_ADVANCED
698 help 806 help
699 This option adds a `hashlimit' match. 807 This option adds a `hashlimit' match.
700 808
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 93c58f973831..ea7508387f95 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -4,7 +4,6 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp
4nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 4nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
5 5
6obj-$(CONFIG_NETFILTER) = netfilter.o 6obj-$(CONFIG_NETFILTER) = netfilter.o
7obj-$(CONFIG_SYSCTL) += nf_sysctl.o
8 7
9obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o 8obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
10obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o 9obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
@@ -40,15 +39,17 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
40# targets 39# targets
41obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 40obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
42obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o 41obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
42obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
43obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o 43obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
44obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o 44obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
45obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
46obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o 45obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
46obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
47obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o 47obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o 48obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o 49obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o 50obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
51obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 51obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
52obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
52 53
53# matches 54# matches
54obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o 55obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
@@ -59,22 +60,25 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
59obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o 60obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
60obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o 61obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
61obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 62obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
63obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
62obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o 64obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
65obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
63obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o 66obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
64obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o 67obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
65obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o 68obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
66obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o 69obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
67obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o 70obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
68obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o 71obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
72obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
69obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o 73obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
70obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
76obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
71obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o 77obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
72obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o 78obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o 79obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o 80obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
75obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o 81obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
76obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
77obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o 82obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
78obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o 83obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
79obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o 84obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
80obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index bed9ba01e8ec..c4065b8f9a95 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -26,10 +26,10 @@
26 26
27static DEFINE_MUTEX(afinfo_mutex); 27static DEFINE_MUTEX(afinfo_mutex);
28 28
29struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly; 29const struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
30EXPORT_SYMBOL(nf_afinfo); 30EXPORT_SYMBOL(nf_afinfo);
31 31
32int nf_register_afinfo(struct nf_afinfo *afinfo) 32int nf_register_afinfo(const struct nf_afinfo *afinfo)
33{ 33{
34 int err; 34 int err;
35 35
@@ -42,7 +42,7 @@ int nf_register_afinfo(struct nf_afinfo *afinfo)
42} 42}
43EXPORT_SYMBOL_GPL(nf_register_afinfo); 43EXPORT_SYMBOL_GPL(nf_register_afinfo);
44 44
45void nf_unregister_afinfo(struct nf_afinfo *afinfo) 45void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
46{ 46{
47 mutex_lock(&afinfo_mutex); 47 mutex_lock(&afinfo_mutex);
48 rcu_assign_pointer(nf_afinfo[afinfo->family], NULL); 48 rcu_assign_pointer(nf_afinfo[afinfo->family], NULL);
@@ -51,28 +51,23 @@ void nf_unregister_afinfo(struct nf_afinfo *afinfo)
51} 51}
52EXPORT_SYMBOL_GPL(nf_unregister_afinfo); 52EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
53 53
54/* In this code, we can be waiting indefinitely for userspace to
55 * service a packet if a hook returns NF_QUEUE. We could keep a count
56 * of skbuffs queued for userspace, and not deregister a hook unless
57 * this is zero, but that sucks. Now, we simply check when the
58 * packets come back: if the hook is gone, the packet is discarded. */
59struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly; 54struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
60EXPORT_SYMBOL(nf_hooks); 55EXPORT_SYMBOL(nf_hooks);
61static DEFINE_MUTEX(nf_hook_mutex); 56static DEFINE_MUTEX(nf_hook_mutex);
62 57
63int nf_register_hook(struct nf_hook_ops *reg) 58int nf_register_hook(struct nf_hook_ops *reg)
64{ 59{
65 struct list_head *i; 60 struct nf_hook_ops *elem;
66 int err; 61 int err;
67 62
68 err = mutex_lock_interruptible(&nf_hook_mutex); 63 err = mutex_lock_interruptible(&nf_hook_mutex);
69 if (err < 0) 64 if (err < 0)
70 return err; 65 return err;
71 list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { 66 list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
72 if (reg->priority < ((struct nf_hook_ops *)i)->priority) 67 if (reg->priority < elem->priority)
73 break; 68 break;
74 } 69 }
75 list_add_rcu(&reg->list, i->prev); 70 list_add_rcu(&reg->list, elem->list.prev);
76 mutex_unlock(&nf_hook_mutex); 71 mutex_unlock(&nf_hook_mutex);
77 return 0; 72 return 0;
78} 73}
@@ -183,8 +178,7 @@ next_hook:
183 } else if (verdict == NF_DROP) { 178 } else if (verdict == NF_DROP) {
184 kfree_skb(skb); 179 kfree_skb(skb);
185 ret = -EPERM; 180 ret = -EPERM;
186 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
187 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
188 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 182 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
189 verdict >> NF_VERDICT_BITS)) 183 verdict >> NF_VERDICT_BITS))
190 goto next_hook; 184 goto next_hook;
@@ -217,22 +211,6 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
217} 211}
218EXPORT_SYMBOL(skb_make_writable); 212EXPORT_SYMBOL(skb_make_writable);
219 213
220void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
221 __be32 from, __be32 to, int pseudohdr)
222{
223 __be32 diff[] = { ~from, to };
224 if (skb->ip_summed != CHECKSUM_PARTIAL) {
225 *sum = csum_fold(csum_partial(diff, sizeof(diff),
226 ~csum_unfold(*sum)));
227 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
228 skb->csum = ~csum_partial(diff, sizeof(diff),
229 ~skb->csum);
230 } else if (pseudohdr)
231 *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
232 csum_unfold(*sum)));
233}
234EXPORT_SYMBOL(nf_proto_csum_replace4);
235
236#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 214#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
237/* This does not belong here, but locally generated errors need it if connection 215/* This does not belong here, but locally generated errors need it if connection
238 tracking in use: without this, connection may not be in hash table, and hence 216 tracking in use: without this, connection may not be in hash table, and hence
@@ -294,3 +272,12 @@ void __init netfilter_init(void)
294 if (netfilter_log_init() < 0) 272 if (netfilter_log_init() < 0)
295 panic("cannot initialize nf_log"); 273 panic("cannot initialize nf_log");
296} 274}
275
276#ifdef CONFIG_SYSCTL
277struct ctl_path nf_net_netfilter_sysctl_path[] = {
278 { .procname = "net", .ctl_name = CTL_NET, },
279 { .procname = "netfilter", .ctl_name = NET_NETFILTER, },
280 { }
281};
282EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
283#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4d6171bc0829..327e847d2702 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -40,7 +40,7 @@
40 40
41#define NF_CONNTRACK_VERSION "0.5.0" 41#define NF_CONNTRACK_VERSION "0.5.0"
42 42
43DEFINE_RWLOCK(nf_conntrack_lock); 43DEFINE_SPINLOCK(nf_conntrack_lock);
44EXPORT_SYMBOL_GPL(nf_conntrack_lock); 44EXPORT_SYMBOL_GPL(nf_conntrack_lock);
45 45
46/* nf_conntrack_standalone needs this */ 46/* nf_conntrack_standalone needs this */
@@ -73,15 +73,19 @@ static unsigned int nf_conntrack_hash_rnd;
73static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 73static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
74 unsigned int size, unsigned int rnd) 74 unsigned int size, unsigned int rnd)
75{ 75{
76 unsigned int a, b; 76 unsigned int n;
77 77 u_int32_t h;
78 a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all), 78
79 (tuple->src.l3num << 16) | tuple->dst.protonum); 79 /* The direction must be ignored, so we hash everything up to the
80 b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), 80 * destination ports (which is a multiple of 4) and treat the last
81 ((__force __u16)tuple->src.u.all << 16) | 81 * three bytes manually.
82 (__force __u16)tuple->dst.u.all); 82 */
83 83 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
84 return jhash_2words(a, b, rnd) % size; 84 h = jhash2((u32 *)tuple, n,
85 rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
86 tuple->dst.protonum));
87
88 return ((u64)h * size) >> 32;
85} 89}
86 90
87static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) 91static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
@@ -166,8 +170,8 @@ static void
166clean_from_lists(struct nf_conn *ct) 170clean_from_lists(struct nf_conn *ct)
167{ 171{
168 pr_debug("clean_from_lists(%p)\n", ct); 172 pr_debug("clean_from_lists(%p)\n", ct);
169 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 173 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
170 hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); 174 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode);
171 175
172 /* Destroy all pending expectations */ 176 /* Destroy all pending expectations */
173 nf_ct_remove_expectations(ct); 177 nf_ct_remove_expectations(ct);
@@ -199,7 +203,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
199 203
200 rcu_read_unlock(); 204 rcu_read_unlock();
201 205
202 write_lock_bh(&nf_conntrack_lock); 206 spin_lock_bh(&nf_conntrack_lock);
203 /* Expectations will have been removed in clean_from_lists, 207 /* Expectations will have been removed in clean_from_lists,
204 * except TFTP can create an expectation on the first packet, 208 * except TFTP can create an expectation on the first packet,
205 * before connection is in the list, so we need to clean here, 209 * before connection is in the list, so we need to clean here,
@@ -213,7 +217,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
213 } 217 }
214 218
215 NF_CT_STAT_INC(delete); 219 NF_CT_STAT_INC(delete);
216 write_unlock_bh(&nf_conntrack_lock); 220 spin_unlock_bh(&nf_conntrack_lock);
217 221
218 if (ct->master) 222 if (ct->master)
219 nf_ct_put(ct->master); 223 nf_ct_put(ct->master);
@@ -236,26 +240,24 @@ static void death_by_timeout(unsigned long ul_conntrack)
236 rcu_read_unlock(); 240 rcu_read_unlock();
237 } 241 }
238 242
239 write_lock_bh(&nf_conntrack_lock); 243 spin_lock_bh(&nf_conntrack_lock);
240 /* Inside lock so preempt is disabled on module removal path. 244 /* Inside lock so preempt is disabled on module removal path.
241 * Otherwise we can get spurious warnings. */ 245 * Otherwise we can get spurious warnings. */
242 NF_CT_STAT_INC(delete_list); 246 NF_CT_STAT_INC(delete_list);
243 clean_from_lists(ct); 247 clean_from_lists(ct);
244 write_unlock_bh(&nf_conntrack_lock); 248 spin_unlock_bh(&nf_conntrack_lock);
245 nf_ct_put(ct); 249 nf_ct_put(ct);
246} 250}
247 251
248struct nf_conntrack_tuple_hash * 252struct nf_conntrack_tuple_hash *
249__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, 253__nf_conntrack_find(const struct nf_conntrack_tuple *tuple)
250 const struct nf_conn *ignored_conntrack)
251{ 254{
252 struct nf_conntrack_tuple_hash *h; 255 struct nf_conntrack_tuple_hash *h;
253 struct hlist_node *n; 256 struct hlist_node *n;
254 unsigned int hash = hash_conntrack(tuple); 257 unsigned int hash = hash_conntrack(tuple);
255 258
256 hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { 259 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
257 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && 260 if (nf_ct_tuple_equal(tuple, &h->tuple)) {
258 nf_ct_tuple_equal(tuple, &h->tuple)) {
259 NF_CT_STAT_INC(found); 261 NF_CT_STAT_INC(found);
260 return h; 262 return h;
261 } 263 }
@@ -271,12 +273,16 @@ struct nf_conntrack_tuple_hash *
271nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) 273nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple)
272{ 274{
273 struct nf_conntrack_tuple_hash *h; 275 struct nf_conntrack_tuple_hash *h;
276 struct nf_conn *ct;
274 277
275 read_lock_bh(&nf_conntrack_lock); 278 rcu_read_lock();
276 h = __nf_conntrack_find(tuple, NULL); 279 h = __nf_conntrack_find(tuple);
277 if (h) 280 if (h) {
278 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); 281 ct = nf_ct_tuplehash_to_ctrack(h);
279 read_unlock_bh(&nf_conntrack_lock); 282 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
283 h = NULL;
284 }
285 rcu_read_unlock();
280 286
281 return h; 287 return h;
282} 288}
@@ -286,10 +292,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
286 unsigned int hash, 292 unsigned int hash,
287 unsigned int repl_hash) 293 unsigned int repl_hash)
288{ 294{
289 hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 295 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
290 &nf_conntrack_hash[hash]); 296 &nf_conntrack_hash[hash]);
291 hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, 297 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode,
292 &nf_conntrack_hash[repl_hash]); 298 &nf_conntrack_hash[repl_hash]);
293} 299}
294 300
295void nf_conntrack_hash_insert(struct nf_conn *ct) 301void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -299,9 +305,9 @@ void nf_conntrack_hash_insert(struct nf_conn *ct)
299 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 305 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
300 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 306 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
301 307
302 write_lock_bh(&nf_conntrack_lock); 308 spin_lock_bh(&nf_conntrack_lock);
303 __nf_conntrack_hash_insert(ct, hash, repl_hash); 309 __nf_conntrack_hash_insert(ct, hash, repl_hash);
304 write_unlock_bh(&nf_conntrack_lock); 310 spin_unlock_bh(&nf_conntrack_lock);
305} 311}
306EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); 312EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
307 313
@@ -338,7 +344,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
338 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 344 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
339 pr_debug("Confirming conntrack %p\n", ct); 345 pr_debug("Confirming conntrack %p\n", ct);
340 346
341 write_lock_bh(&nf_conntrack_lock); 347 spin_lock_bh(&nf_conntrack_lock);
342 348
343 /* See if there's one in the list already, including reverse: 349 /* See if there's one in the list already, including reverse:
344 NAT could have grabbed it without realizing, since we're 350 NAT could have grabbed it without realizing, since we're
@@ -364,7 +370,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
364 atomic_inc(&ct->ct_general.use); 370 atomic_inc(&ct->ct_general.use);
365 set_bit(IPS_CONFIRMED_BIT, &ct->status); 371 set_bit(IPS_CONFIRMED_BIT, &ct->status);
366 NF_CT_STAT_INC(insert); 372 NF_CT_STAT_INC(insert);
367 write_unlock_bh(&nf_conntrack_lock); 373 spin_unlock_bh(&nf_conntrack_lock);
368 help = nfct_help(ct); 374 help = nfct_help(ct);
369 if (help && help->helper) 375 if (help && help->helper)
370 nf_conntrack_event_cache(IPCT_HELPER, skb); 376 nf_conntrack_event_cache(IPCT_HELPER, skb);
@@ -379,7 +385,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
379 385
380out: 386out:
381 NF_CT_STAT_INC(insert_failed); 387 NF_CT_STAT_INC(insert_failed);
382 write_unlock_bh(&nf_conntrack_lock); 388 spin_unlock_bh(&nf_conntrack_lock);
383 return NF_DROP; 389 return NF_DROP;
384} 390}
385EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 391EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
@@ -391,12 +397,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
391 const struct nf_conn *ignored_conntrack) 397 const struct nf_conn *ignored_conntrack)
392{ 398{
393 struct nf_conntrack_tuple_hash *h; 399 struct nf_conntrack_tuple_hash *h;
400 struct hlist_node *n;
401 unsigned int hash = hash_conntrack(tuple);
394 402
395 read_lock_bh(&nf_conntrack_lock); 403 rcu_read_lock();
396 h = __nf_conntrack_find(tuple, ignored_conntrack); 404 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
397 read_unlock_bh(&nf_conntrack_lock); 405 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
406 nf_ct_tuple_equal(tuple, &h->tuple)) {
407 NF_CT_STAT_INC(found);
408 rcu_read_unlock();
409 return 1;
410 }
411 NF_CT_STAT_INC(searched);
412 }
413 rcu_read_unlock();
398 414
399 return h != NULL; 415 return 0;
400} 416}
401EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); 417EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
402 418
@@ -404,7 +420,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
404 420
405/* There's a small race here where we may free a just-assured 421/* There's a small race here where we may free a just-assured
406 connection. Too bad: we're in trouble anyway. */ 422 connection. Too bad: we're in trouble anyway. */
407static int early_drop(unsigned int hash) 423static noinline int early_drop(unsigned int hash)
408{ 424{
409 /* Use oldest entry, which is roughly LRU */ 425 /* Use oldest entry, which is roughly LRU */
410 struct nf_conntrack_tuple_hash *h; 426 struct nf_conntrack_tuple_hash *h;
@@ -413,21 +429,23 @@ static int early_drop(unsigned int hash)
413 unsigned int i, cnt = 0; 429 unsigned int i, cnt = 0;
414 int dropped = 0; 430 int dropped = 0;
415 431
416 read_lock_bh(&nf_conntrack_lock); 432 rcu_read_lock();
417 for (i = 0; i < nf_conntrack_htable_size; i++) { 433 for (i = 0; i < nf_conntrack_htable_size; i++) {
418 hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { 434 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
435 hnode) {
419 tmp = nf_ct_tuplehash_to_ctrack(h); 436 tmp = nf_ct_tuplehash_to_ctrack(h);
420 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) 437 if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
421 ct = tmp; 438 ct = tmp;
422 cnt++; 439 cnt++;
423 } 440 }
441
442 if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
443 ct = NULL;
424 if (ct || cnt >= NF_CT_EVICTION_RANGE) 444 if (ct || cnt >= NF_CT_EVICTION_RANGE)
425 break; 445 break;
426 hash = (hash + 1) % nf_conntrack_htable_size; 446 hash = (hash + 1) % nf_conntrack_htable_size;
427 } 447 }
428 if (ct) 448 rcu_read_unlock();
429 atomic_inc(&ct->ct_general.use);
430 read_unlock_bh(&nf_conntrack_lock);
431 449
432 if (!ct) 450 if (!ct)
433 return dropped; 451 return dropped;
@@ -444,7 +462,7 @@ static int early_drop(unsigned int hash)
444struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, 462struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
445 const struct nf_conntrack_tuple *repl) 463 const struct nf_conntrack_tuple *repl)
446{ 464{
447 struct nf_conn *conntrack = NULL; 465 struct nf_conn *ct = NULL;
448 466
449 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 467 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
450 get_random_bytes(&nf_conntrack_hash_rnd, 4); 468 get_random_bytes(&nf_conntrack_hash_rnd, 4);
@@ -454,8 +472,8 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
454 /* We don't want any race condition at early drop stage */ 472 /* We don't want any race condition at early drop stage */
455 atomic_inc(&nf_conntrack_count); 473 atomic_inc(&nf_conntrack_count);
456 474
457 if (nf_conntrack_max 475 if (nf_conntrack_max &&
458 && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { 476 unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) {
459 unsigned int hash = hash_conntrack(orig); 477 unsigned int hash = hash_conntrack(orig);
460 if (!early_drop(hash)) { 478 if (!early_drop(hash)) {
461 atomic_dec(&nf_conntrack_count); 479 atomic_dec(&nf_conntrack_count);
@@ -467,30 +485,37 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
467 } 485 }
468 } 486 }
469 487
470 conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); 488 ct = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC);
471 if (conntrack == NULL) { 489 if (ct == NULL) {
472 pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); 490 pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
473 atomic_dec(&nf_conntrack_count); 491 atomic_dec(&nf_conntrack_count);
474 return ERR_PTR(-ENOMEM); 492 return ERR_PTR(-ENOMEM);
475 } 493 }
476 494
477 atomic_set(&conntrack->ct_general.use, 1); 495 atomic_set(&ct->ct_general.use, 1);
478 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 496 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
479 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 497 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
480 /* Don't set timer yet: wait for confirmation */ 498 /* Don't set timer yet: wait for confirmation */
481 setup_timer(&conntrack->timeout, death_by_timeout, 499 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
482 (unsigned long)conntrack); 500 INIT_RCU_HEAD(&ct->rcu);
483 501
484 return conntrack; 502 return ct;
485} 503}
486EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 504EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
487 505
488void nf_conntrack_free(struct nf_conn *conntrack) 506static void nf_conntrack_free_rcu(struct rcu_head *head)
489{ 507{
490 nf_ct_ext_free(conntrack); 508 struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
491 kmem_cache_free(nf_conntrack_cachep, conntrack); 509
510 nf_ct_ext_free(ct);
511 kmem_cache_free(nf_conntrack_cachep, ct);
492 atomic_dec(&nf_conntrack_count); 512 atomic_dec(&nf_conntrack_count);
493} 513}
514
515void nf_conntrack_free(struct nf_conn *ct)
516{
517 call_rcu(&ct->rcu, nf_conntrack_free_rcu);
518}
494EXPORT_SYMBOL_GPL(nf_conntrack_free); 519EXPORT_SYMBOL_GPL(nf_conntrack_free);
495 520
496/* Allocate a new conntrack: we return -ENOMEM if classification 521/* Allocate a new conntrack: we return -ENOMEM if classification
@@ -502,7 +527,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
502 struct sk_buff *skb, 527 struct sk_buff *skb,
503 unsigned int dataoff) 528 unsigned int dataoff)
504{ 529{
505 struct nf_conn *conntrack; 530 struct nf_conn *ct;
506 struct nf_conn_help *help; 531 struct nf_conn_help *help;
507 struct nf_conntrack_tuple repl_tuple; 532 struct nf_conntrack_tuple repl_tuple;
508 struct nf_conntrack_expect *exp; 533 struct nf_conntrack_expect *exp;
@@ -512,46 +537,46 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
512 return NULL; 537 return NULL;
513 } 538 }
514 539
515 conntrack = nf_conntrack_alloc(tuple, &repl_tuple); 540 ct = nf_conntrack_alloc(tuple, &repl_tuple);
516 if (conntrack == NULL || IS_ERR(conntrack)) { 541 if (ct == NULL || IS_ERR(ct)) {
517 pr_debug("Can't allocate conntrack.\n"); 542 pr_debug("Can't allocate conntrack.\n");
518 return (struct nf_conntrack_tuple_hash *)conntrack; 543 return (struct nf_conntrack_tuple_hash *)ct;
519 } 544 }
520 545
521 if (!l4proto->new(conntrack, skb, dataoff)) { 546 if (!l4proto->new(ct, skb, dataoff)) {
522 nf_conntrack_free(conntrack); 547 nf_conntrack_free(ct);
523 pr_debug("init conntrack: can't track with proto module\n"); 548 pr_debug("init conntrack: can't track with proto module\n");
524 return NULL; 549 return NULL;
525 } 550 }
526 551
527 write_lock_bh(&nf_conntrack_lock); 552 spin_lock_bh(&nf_conntrack_lock);
528 exp = nf_ct_find_expectation(tuple); 553 exp = nf_ct_find_expectation(tuple);
529 if (exp) { 554 if (exp) {
530 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", 555 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
531 conntrack, exp); 556 ct, exp);
532 /* Welcome, Mr. Bond. We've been expecting you... */ 557 /* Welcome, Mr. Bond. We've been expecting you... */
533 __set_bit(IPS_EXPECTED_BIT, &conntrack->status); 558 __set_bit(IPS_EXPECTED_BIT, &ct->status);
534 conntrack->master = exp->master; 559 ct->master = exp->master;
535 if (exp->helper) { 560 if (exp->helper) {
536 help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); 561 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
537 if (help) 562 if (help)
538 rcu_assign_pointer(help->helper, exp->helper); 563 rcu_assign_pointer(help->helper, exp->helper);
539 } 564 }
540 565
541#ifdef CONFIG_NF_CONNTRACK_MARK 566#ifdef CONFIG_NF_CONNTRACK_MARK
542 conntrack->mark = exp->master->mark; 567 ct->mark = exp->master->mark;
543#endif 568#endif
544#ifdef CONFIG_NF_CONNTRACK_SECMARK 569#ifdef CONFIG_NF_CONNTRACK_SECMARK
545 conntrack->secmark = exp->master->secmark; 570 ct->secmark = exp->master->secmark;
546#endif 571#endif
547 nf_conntrack_get(&conntrack->master->ct_general); 572 nf_conntrack_get(&ct->master->ct_general);
548 NF_CT_STAT_INC(expect_new); 573 NF_CT_STAT_INC(expect_new);
549 } else { 574 } else {
550 struct nf_conntrack_helper *helper; 575 struct nf_conntrack_helper *helper;
551 576
552 helper = __nf_ct_helper_find(&repl_tuple); 577 helper = __nf_ct_helper_find(&repl_tuple);
553 if (helper) { 578 if (helper) {
554 help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); 579 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
555 if (help) 580 if (help)
556 rcu_assign_pointer(help->helper, helper); 581 rcu_assign_pointer(help->helper, helper);
557 } 582 }
@@ -559,18 +584,17 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
559 } 584 }
560 585
561 /* Overload tuple linked list to put us in unconfirmed list. */ 586 /* Overload tuple linked list to put us in unconfirmed list. */
562 hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 587 hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed);
563 &unconfirmed);
564 588
565 write_unlock_bh(&nf_conntrack_lock); 589 spin_unlock_bh(&nf_conntrack_lock);
566 590
567 if (exp) { 591 if (exp) {
568 if (exp->expectfn) 592 if (exp->expectfn)
569 exp->expectfn(conntrack, exp); 593 exp->expectfn(ct, exp);
570 nf_ct_expect_put(exp); 594 nf_ct_expect_put(exp);
571 } 595 }
572 596
573 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; 597 return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
574} 598}
575 599
576/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ 600/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
@@ -729,7 +753,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
729 struct nf_conn_help *help = nfct_help(ct); 753 struct nf_conn_help *help = nfct_help(ct);
730 struct nf_conntrack_helper *helper; 754 struct nf_conntrack_helper *helper;
731 755
732 write_lock_bh(&nf_conntrack_lock);
733 /* Should be unconfirmed, so not in hash table yet */ 756 /* Should be unconfirmed, so not in hash table yet */
734 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 757 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
735 758
@@ -738,8 +761,9 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
738 761
739 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 762 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
740 if (ct->master || (help && help->expecting != 0)) 763 if (ct->master || (help && help->expecting != 0))
741 goto out; 764 return;
742 765
766 rcu_read_lock();
743 helper = __nf_ct_helper_find(newreply); 767 helper = __nf_ct_helper_find(newreply);
744 if (helper == NULL) { 768 if (helper == NULL) {
745 if (help) 769 if (help)
@@ -757,7 +781,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
757 781
758 rcu_assign_pointer(help->helper, helper); 782 rcu_assign_pointer(help->helper, helper);
759out: 783out:
760 write_unlock_bh(&nf_conntrack_lock); 784 rcu_read_unlock();
761} 785}
762EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); 786EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
763 787
@@ -773,13 +797,11 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
773 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); 797 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
774 NF_CT_ASSERT(skb); 798 NF_CT_ASSERT(skb);
775 799
776 write_lock_bh(&nf_conntrack_lock); 800 spin_lock_bh(&nf_conntrack_lock);
777 801
778 /* Only update if this is not a fixed timeout */ 802 /* Only update if this is not a fixed timeout */
779 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { 803 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
780 write_unlock_bh(&nf_conntrack_lock); 804 goto acct;
781 return;
782 }
783 805
784 /* If not in hash table, timer will not be active yet */ 806 /* If not in hash table, timer will not be active yet */
785 if (!nf_ct_is_confirmed(ct)) { 807 if (!nf_ct_is_confirmed(ct)) {
@@ -799,6 +821,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
799 } 821 }
800 } 822 }
801 823
824acct:
802#ifdef CONFIG_NF_CT_ACCT 825#ifdef CONFIG_NF_CT_ACCT
803 if (do_acct) { 826 if (do_acct) {
804 ct->counters[CTINFO2DIR(ctinfo)].packets++; 827 ct->counters[CTINFO2DIR(ctinfo)].packets++;
@@ -811,7 +834,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
811 } 834 }
812#endif 835#endif
813 836
814 write_unlock_bh(&nf_conntrack_lock); 837 spin_unlock_bh(&nf_conntrack_lock);
815 838
816 /* must be unlocked when calling event cache */ 839 /* must be unlocked when calling event cache */
817 if (event) 840 if (event)
@@ -831,10 +854,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
831int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 854int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
832 const struct nf_conntrack_tuple *tuple) 855 const struct nf_conntrack_tuple *tuple)
833{ 856{
834 NLA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), 857 NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port);
835 &tuple->src.u.tcp.port); 858 NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port);
836 NLA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
837 &tuple->dst.u.tcp.port);
838 return 0; 859 return 0;
839 860
840nla_put_failure: 861nla_put_failure:
@@ -854,8 +875,8 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
854 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) 875 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
855 return -EINVAL; 876 return -EINVAL;
856 877
857 t->src.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_SRC_PORT]); 878 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
858 t->dst.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_DST_PORT]); 879 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
859 880
860 return 0; 881 return 0;
861} 882}
@@ -863,7 +884,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
863#endif 884#endif
864 885
865/* Used by ipt_REJECT and ip6t_REJECT. */ 886/* Used by ipt_REJECT and ip6t_REJECT. */
866void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 887static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
867{ 888{
868 struct nf_conn *ct; 889 struct nf_conn *ct;
869 enum ip_conntrack_info ctinfo; 890 enum ip_conntrack_info ctinfo;
@@ -880,15 +901,6 @@ void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
880 nskb->nfctinfo = ctinfo; 901 nskb->nfctinfo = ctinfo;
881 nf_conntrack_get(nskb->nfct); 902 nf_conntrack_get(nskb->nfct);
882} 903}
883EXPORT_SYMBOL_GPL(__nf_conntrack_attach);
884
885static inline int
886do_iter(const struct nf_conntrack_tuple_hash *i,
887 int (*iter)(struct nf_conn *i, void *data),
888 void *data)
889{
890 return iter(nf_ct_tuplehash_to_ctrack(i), data);
891}
892 904
893/* Bring out ya dead! */ 905/* Bring out ya dead! */
894static struct nf_conn * 906static struct nf_conn *
@@ -899,7 +911,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
899 struct nf_conn *ct; 911 struct nf_conn *ct;
900 struct hlist_node *n; 912 struct hlist_node *n;
901 913
902 write_lock_bh(&nf_conntrack_lock); 914 spin_lock_bh(&nf_conntrack_lock);
903 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 915 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
904 hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { 916 hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) {
905 ct = nf_ct_tuplehash_to_ctrack(h); 917 ct = nf_ct_tuplehash_to_ctrack(h);
@@ -912,11 +924,11 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
912 if (iter(ct, data)) 924 if (iter(ct, data))
913 set_bit(IPS_DYING_BIT, &ct->status); 925 set_bit(IPS_DYING_BIT, &ct->status);
914 } 926 }
915 write_unlock_bh(&nf_conntrack_lock); 927 spin_unlock_bh(&nf_conntrack_lock);
916 return NULL; 928 return NULL;
917found: 929found:
918 atomic_inc(&ct->ct_general.use); 930 atomic_inc(&ct->ct_general.use);
919 write_unlock_bh(&nf_conntrack_lock); 931 spin_unlock_bh(&nf_conntrack_lock);
920 return ct; 932 return ct;
921} 933}
922 934
@@ -942,7 +954,7 @@ static int kill_all(struct nf_conn *i, void *data)
942 return 1; 954 return 1;
943} 955}
944 956
945void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, int size) 957void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size)
946{ 958{
947 if (vmalloced) 959 if (vmalloced)
948 vfree(hash); 960 vfree(hash);
@@ -991,7 +1003,7 @@ void nf_conntrack_cleanup(void)
991 nf_conntrack_expect_fini(); 1003 nf_conntrack_expect_fini();
992} 1004}
993 1005
994struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced) 1006struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
995{ 1007{
996 struct hlist_head *hash; 1008 struct hlist_head *hash;
997 unsigned int size, i; 1009 unsigned int size, i;
@@ -999,7 +1011,7 @@ struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced)
999 *vmalloced = 0; 1011 *vmalloced = 0;
1000 1012
1001 size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); 1013 size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head));
1002 hash = (void*)__get_free_pages(GFP_KERNEL, 1014 hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN,
1003 get_order(sizeof(struct hlist_head) 1015 get_order(sizeof(struct hlist_head)
1004 * size)); 1016 * size));
1005 if (!hash) { 1017 if (!hash) {
@@ -1016,10 +1028,10 @@ struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced)
1016} 1028}
1017EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); 1029EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1018 1030
1019int set_hashsize(const char *val, struct kernel_param *kp) 1031int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1020{ 1032{
1021 int i, bucket, hashsize, vmalloced; 1033 int i, bucket, vmalloced, old_vmalloced;
1022 int old_vmalloced, old_size; 1034 unsigned int hashsize, old_size;
1023 int rnd; 1035 int rnd;
1024 struct hlist_head *hash, *old_hash; 1036 struct hlist_head *hash, *old_hash;
1025 struct nf_conntrack_tuple_hash *h; 1037 struct nf_conntrack_tuple_hash *h;
@@ -1028,7 +1040,7 @@ int set_hashsize(const char *val, struct kernel_param *kp)
1028 if (!nf_conntrack_htable_size) 1040 if (!nf_conntrack_htable_size)
1029 return param_set_uint(val, kp); 1041 return param_set_uint(val, kp);
1030 1042
1031 hashsize = simple_strtol(val, NULL, 0); 1043 hashsize = simple_strtoul(val, NULL, 0);
1032 if (!hashsize) 1044 if (!hashsize)
1033 return -EINVAL; 1045 return -EINVAL;
1034 1046
@@ -1040,12 +1052,17 @@ int set_hashsize(const char *val, struct kernel_param *kp)
1040 * use a newrandom seed */ 1052 * use a newrandom seed */
1041 get_random_bytes(&rnd, 4); 1053 get_random_bytes(&rnd, 4);
1042 1054
1043 write_lock_bh(&nf_conntrack_lock); 1055 /* Lookups in the old hash might happen in parallel, which means we
1056 * might get false negatives during connection lookup. New connections
1057 * created because of a false negative won't make it into the hash
1058 * though since that required taking the lock.
1059 */
1060 spin_lock_bh(&nf_conntrack_lock);
1044 for (i = 0; i < nf_conntrack_htable_size; i++) { 1061 for (i = 0; i < nf_conntrack_htable_size; i++) {
1045 while (!hlist_empty(&nf_conntrack_hash[i])) { 1062 while (!hlist_empty(&nf_conntrack_hash[i])) {
1046 h = hlist_entry(nf_conntrack_hash[i].first, 1063 h = hlist_entry(nf_conntrack_hash[i].first,
1047 struct nf_conntrack_tuple_hash, hnode); 1064 struct nf_conntrack_tuple_hash, hnode);
1048 hlist_del(&h->hnode); 1065 hlist_del_rcu(&h->hnode);
1049 bucket = __hash_conntrack(&h->tuple, hashsize, rnd); 1066 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1050 hlist_add_head(&h->hnode, &hash[bucket]); 1067 hlist_add_head(&h->hnode, &hash[bucket]);
1051 } 1068 }
@@ -1058,13 +1075,14 @@ int set_hashsize(const char *val, struct kernel_param *kp)
1058 nf_conntrack_vmalloc = vmalloced; 1075 nf_conntrack_vmalloc = vmalloced;
1059 nf_conntrack_hash = hash; 1076 nf_conntrack_hash = hash;
1060 nf_conntrack_hash_rnd = rnd; 1077 nf_conntrack_hash_rnd = rnd;
1061 write_unlock_bh(&nf_conntrack_lock); 1078 spin_unlock_bh(&nf_conntrack_lock);
1062 1079
1063 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); 1080 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
1064 return 0; 1081 return 0;
1065} 1082}
1083EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1066 1084
1067module_param_call(hashsize, set_hashsize, param_get_uint, 1085module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1068 &nf_conntrack_htable_size, 0600); 1086 &nf_conntrack_htable_size, 0600);
1069 1087
1070int __init nf_conntrack_init(void) 1088int __init nf_conntrack_init(void)
@@ -1123,7 +1141,7 @@ int __init nf_conntrack_init(void)
1123 goto out_fini_expect; 1141 goto out_fini_expect;
1124 1142
1125 /* For use by REJECT target */ 1143 /* For use by REJECT target */
1126 rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach); 1144 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
1127 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); 1145 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
1128 1146
1129 /* Set up fake conntrack: 1147 /* Set up fake conntrack:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 175c8d1a1992..e06bf0028bb1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -50,7 +50,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
50 NF_CT_ASSERT(master_help); 50 NF_CT_ASSERT(master_help);
51 NF_CT_ASSERT(!timer_pending(&exp->timeout)); 51 NF_CT_ASSERT(!timer_pending(&exp->timeout));
52 52
53 hlist_del(&exp->hnode); 53 hlist_del_rcu(&exp->hnode);
54 nf_ct_expect_count--; 54 nf_ct_expect_count--;
55 55
56 hlist_del(&exp->lnode); 56 hlist_del(&exp->lnode);
@@ -65,23 +65,25 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65{ 65{
66 struct nf_conntrack_expect *exp = (void *)ul_expect; 66 struct nf_conntrack_expect *exp = (void *)ul_expect;
67 67
68 write_lock_bh(&nf_conntrack_lock); 68 spin_lock_bh(&nf_conntrack_lock);
69 nf_ct_unlink_expect(exp); 69 nf_ct_unlink_expect(exp);
70 write_unlock_bh(&nf_conntrack_lock); 70 spin_unlock_bh(&nf_conntrack_lock);
71 nf_ct_expect_put(exp); 71 nf_ct_expect_put(exp);
72} 72}
73 73
74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) 74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75{ 75{
76 unsigned int hash;
77
76 if (unlikely(!nf_ct_expect_hash_rnd_initted)) { 78 if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
77 get_random_bytes(&nf_ct_expect_hash_rnd, 4); 79 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
78 nf_ct_expect_hash_rnd_initted = 1; 80 nf_ct_expect_hash_rnd_initted = 1;
79 } 81 }
80 82
81 return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), 83 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
82 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | 84 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
83 (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) % 85 (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
84 nf_ct_expect_hsize; 86 return ((u64)hash * nf_ct_expect_hsize) >> 32;
85} 87}
86 88
87struct nf_conntrack_expect * 89struct nf_conntrack_expect *
@@ -95,7 +97,7 @@ __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
95 return NULL; 97 return NULL;
96 98
97 h = nf_ct_expect_dst_hash(tuple); 99 h = nf_ct_expect_dst_hash(tuple);
98 hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { 100 hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
99 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) 101 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
100 return i; 102 return i;
101 } 103 }
@@ -109,11 +111,11 @@ nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
109{ 111{
110 struct nf_conntrack_expect *i; 112 struct nf_conntrack_expect *i;
111 113
112 read_lock_bh(&nf_conntrack_lock); 114 rcu_read_lock();
113 i = __nf_ct_expect_find(tuple); 115 i = __nf_ct_expect_find(tuple);
114 if (i) 116 if (i && !atomic_inc_not_zero(&i->use))
115 atomic_inc(&i->use); 117 i = NULL;
116 read_unlock_bh(&nf_conntrack_lock); 118 rcu_read_unlock();
117 119
118 return i; 120 return i;
119} 121}
@@ -199,12 +201,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
199/* Generally a bad idea to call this: could have matched already. */ 201/* Generally a bad idea to call this: could have matched already. */
200void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) 202void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
201{ 203{
202 write_lock_bh(&nf_conntrack_lock); 204 spin_lock_bh(&nf_conntrack_lock);
203 if (del_timer(&exp->timeout)) { 205 if (del_timer(&exp->timeout)) {
204 nf_ct_unlink_expect(exp); 206 nf_ct_unlink_expect(exp);
205 nf_ct_expect_put(exp); 207 nf_ct_expect_put(exp);
206 } 208 }
207 write_unlock_bh(&nf_conntrack_lock); 209 spin_unlock_bh(&nf_conntrack_lock);
208} 210}
209EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); 211EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
210 212
@@ -221,13 +223,14 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
221 223
222 new->master = me; 224 new->master = me;
223 atomic_set(&new->use, 1); 225 atomic_set(&new->use, 1);
226 INIT_RCU_HEAD(&new->rcu);
224 return new; 227 return new;
225} 228}
226EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); 229EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
227 230
228void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, 231void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
229 union nf_conntrack_address *saddr, 232 union nf_inet_addr *saddr,
230 union nf_conntrack_address *daddr, 233 union nf_inet_addr *daddr,
231 u_int8_t proto, __be16 *src, __be16 *dst) 234 u_int8_t proto, __be16 *src, __be16 *dst)
232{ 235{
233 int len; 236 int len;
@@ -276,10 +279,18 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
276} 279}
277EXPORT_SYMBOL_GPL(nf_ct_expect_init); 280EXPORT_SYMBOL_GPL(nf_ct_expect_init);
278 281
282static void nf_ct_expect_free_rcu(struct rcu_head *head)
283{
284 struct nf_conntrack_expect *exp;
285
286 exp = container_of(head, struct nf_conntrack_expect, rcu);
287 kmem_cache_free(nf_ct_expect_cachep, exp);
288}
289
279void nf_ct_expect_put(struct nf_conntrack_expect *exp) 290void nf_ct_expect_put(struct nf_conntrack_expect *exp)
280{ 291{
281 if (atomic_dec_and_test(&exp->use)) 292 if (atomic_dec_and_test(&exp->use))
282 kmem_cache_free(nf_ct_expect_cachep, exp); 293 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
283} 294}
284EXPORT_SYMBOL_GPL(nf_ct_expect_put); 295EXPORT_SYMBOL_GPL(nf_ct_expect_put);
285 296
@@ -293,7 +304,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
293 hlist_add_head(&exp->lnode, &master_help->expectations); 304 hlist_add_head(&exp->lnode, &master_help->expectations);
294 master_help->expecting++; 305 master_help->expecting++;
295 306
296 hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); 307 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
297 nf_ct_expect_count++; 308 nf_ct_expect_count++;
298 309
299 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 310 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
@@ -344,7 +355,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
344 355
345 NF_CT_ASSERT(master_help); 356 NF_CT_ASSERT(master_help);
346 357
347 write_lock_bh(&nf_conntrack_lock); 358 spin_lock_bh(&nf_conntrack_lock);
348 if (!master_help->helper) { 359 if (!master_help->helper) {
349 ret = -ESHUTDOWN; 360 ret = -ESHUTDOWN;
350 goto out; 361 goto out;
@@ -379,7 +390,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
379 nf_ct_expect_event(IPEXP_NEW, expect); 390 nf_ct_expect_event(IPEXP_NEW, expect);
380 ret = 0; 391 ret = 0;
381out: 392out:
382 write_unlock_bh(&nf_conntrack_lock); 393 spin_unlock_bh(&nf_conntrack_lock);
383 return ret; 394 return ret;
384} 395}
385EXPORT_SYMBOL_GPL(nf_ct_expect_related); 396EXPORT_SYMBOL_GPL(nf_ct_expect_related);
@@ -392,10 +403,12 @@ struct ct_expect_iter_state {
392static struct hlist_node *ct_expect_get_first(struct seq_file *seq) 403static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
393{ 404{
394 struct ct_expect_iter_state *st = seq->private; 405 struct ct_expect_iter_state *st = seq->private;
406 struct hlist_node *n;
395 407
396 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 408 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
397 if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) 409 n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
398 return nf_ct_expect_hash[st->bucket].first; 410 if (n)
411 return n;
399 } 412 }
400 return NULL; 413 return NULL;
401} 414}
@@ -405,11 +418,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
405{ 418{
406 struct ct_expect_iter_state *st = seq->private; 419 struct ct_expect_iter_state *st = seq->private;
407 420
408 head = head->next; 421 head = rcu_dereference(head->next);
409 while (head == NULL) { 422 while (head == NULL) {
410 if (++st->bucket >= nf_ct_expect_hsize) 423 if (++st->bucket >= nf_ct_expect_hsize)
411 return NULL; 424 return NULL;
412 head = nf_ct_expect_hash[st->bucket].first; 425 head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
413 } 426 }
414 return head; 427 return head;
415} 428}
@@ -425,8 +438,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
425} 438}
426 439
427static void *exp_seq_start(struct seq_file *seq, loff_t *pos) 440static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
441 __acquires(RCU)
428{ 442{
429 read_lock_bh(&nf_conntrack_lock); 443 rcu_read_lock();
430 return ct_expect_get_idx(seq, *pos); 444 return ct_expect_get_idx(seq, *pos);
431} 445}
432 446
@@ -437,8 +451,9 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
437} 451}
438 452
439static void exp_seq_stop(struct seq_file *seq, void *v) 453static void exp_seq_stop(struct seq_file *seq, void *v)
454 __releases(RCU)
440{ 455{
441 read_unlock_bh(&nf_conntrack_lock); 456 rcu_read_unlock();
442} 457}
443 458
444static int exp_seq_show(struct seq_file *s, void *v) 459static int exp_seq_show(struct seq_file *s, void *v)
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index a1a65a1313b3..cf6ba6659a80 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -109,7 +109,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
109 rcu_read_lock(); 109 rcu_read_lock();
110 t = rcu_dereference(nf_ct_ext_types[i]); 110 t = rcu_dereference(nf_ct_ext_types[i]);
111 if (t && t->move) 111 if (t && t->move)
112 t->move(ct, ct->ext + ct->ext->offset[id]); 112 t->move(ct, ct->ext + ct->ext->offset[i]);
113 rcu_read_unlock(); 113 rcu_read_unlock();
114 } 114 }
115 kfree(ct->ext); 115 kfree(ct->ext);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 6df259067f7e..6770baf2e845 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -358,7 +358,7 @@ static int help(struct sk_buff *skb,
358 unsigned int matchlen, matchoff; 358 unsigned int matchlen, matchoff;
359 struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; 359 struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info;
360 struct nf_conntrack_expect *exp; 360 struct nf_conntrack_expect *exp;
361 union nf_conntrack_address *daddr; 361 union nf_inet_addr *daddr;
362 struct nf_conntrack_man cmd = {}; 362 struct nf_conntrack_man cmd = {};
363 unsigned int i; 363 unsigned int i;
364 int found = 0, ends_in_nl; 364 int found = 0, ends_in_nl;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index a869403b2294..867882313e49 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -87,7 +87,7 @@ typedef struct field_t {
87 unsigned char ub; 87 unsigned char ub;
88 unsigned short attr; 88 unsigned short attr;
89 unsigned short offset; 89 unsigned short offset;
90 struct field_t *fields; 90 const struct field_t *fields;
91} field_t; 91} field_t;
92 92
93/* Bit Stream */ 93/* Bit Stream */
@@ -96,37 +96,37 @@ typedef struct {
96 unsigned char *beg; 96 unsigned char *beg;
97 unsigned char *end; 97 unsigned char *end;
98 unsigned char *cur; 98 unsigned char *cur;
99 unsigned bit; 99 unsigned int bit;
100} bitstr_t; 100} bitstr_t;
101 101
102/* Tool Functions */ 102/* Tool Functions */
103#define INC_BIT(bs) if((++bs->bit)>7){bs->cur++;bs->bit=0;} 103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
104#define INC_BITS(bs,b) if((bs->bit+=b)>7){bs->cur+=bs->bit>>3;bs->bit&=7;} 104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
105#define BYTE_ALIGN(bs) if(bs->bit){bs->cur++;bs->bit=0;} 105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
106#define CHECK_BOUND(bs,n) if(bs->cur+(n)>bs->end)return(H323_ERROR_BOUND) 106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
107static unsigned get_len(bitstr_t * bs); 107static unsigned int get_len(bitstr_t *bs);
108static unsigned get_bit(bitstr_t * bs); 108static unsigned int get_bit(bitstr_t *bs);
109static unsigned get_bits(bitstr_t * bs, unsigned b); 109static unsigned int get_bits(bitstr_t *bs, unsigned int b);
110static unsigned get_bitmap(bitstr_t * bs, unsigned b); 110static unsigned int get_bitmap(bitstr_t *bs, unsigned int b);
111static unsigned get_uint(bitstr_t * bs, int b); 111static unsigned int get_uint(bitstr_t *bs, int b);
112 112
113/* Decoder Functions */ 113/* Decoder Functions */
114static int decode_nul(bitstr_t * bs, field_t * f, char *base, int level); 114static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level);
115static int decode_bool(bitstr_t * bs, field_t * f, char *base, int level); 115static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level);
116static int decode_oid(bitstr_t * bs, field_t * f, char *base, int level); 116static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level);
117static int decode_int(bitstr_t * bs, field_t * f, char *base, int level); 117static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level);
118static int decode_enum(bitstr_t * bs, field_t * f, char *base, int level); 118static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level);
119static int decode_bitstr(bitstr_t * bs, field_t * f, char *base, int level); 119static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
120static int decode_numstr(bitstr_t * bs, field_t * f, char *base, int level); 120static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
121static int decode_octstr(bitstr_t * bs, field_t * f, char *base, int level); 121static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
122static int decode_bmpstr(bitstr_t * bs, field_t * f, char *base, int level); 122static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
123static int decode_seq(bitstr_t * bs, field_t * f, char *base, int level); 123static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level);
124static int decode_seqof(bitstr_t * bs, field_t * f, char *base, int level); 124static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level);
125static int decode_choice(bitstr_t * bs, field_t * f, char *base, int level); 125static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level);
126 126
127/* Decoder Functions Vector */ 127/* Decoder Functions Vector */
128typedef int (*decoder_t) (bitstr_t *, field_t *, char *, int); 128typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int);
129static decoder_t Decoders[] = { 129static const decoder_t Decoders[] = {
130 decode_nul, 130 decode_nul,
131 decode_bool, 131 decode_bool,
132 decode_oid, 132 decode_oid,
@@ -150,9 +150,9 @@ static decoder_t Decoders[] = {
150 * Functions 150 * Functions
151 ****************************************************************************/ 151 ****************************************************************************/
152/* Assume bs is aligned && v < 16384 */ 152/* Assume bs is aligned && v < 16384 */
153unsigned get_len(bitstr_t * bs) 153static unsigned int get_len(bitstr_t *bs)
154{ 154{
155 unsigned v; 155 unsigned int v;
156 156
157 v = *bs->cur++; 157 v = *bs->cur++;
158 158
@@ -166,9 +166,9 @@ unsigned get_len(bitstr_t * bs)
166} 166}
167 167
168/****************************************************************************/ 168/****************************************************************************/
169unsigned get_bit(bitstr_t * bs) 169static unsigned int get_bit(bitstr_t *bs)
170{ 170{
171 unsigned b = (*bs->cur) & (0x80 >> bs->bit); 171 unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
172 172
173 INC_BIT(bs); 173 INC_BIT(bs);
174 174
@@ -177,9 +177,9 @@ unsigned get_bit(bitstr_t * bs)
177 177
178/****************************************************************************/ 178/****************************************************************************/
179/* Assume b <= 8 */ 179/* Assume b <= 8 */
180unsigned get_bits(bitstr_t * bs, unsigned b) 180static unsigned int get_bits(bitstr_t *bs, unsigned int b)
181{ 181{
182 unsigned v, l; 182 unsigned int v, l;
183 183
184 v = (*bs->cur) & (0xffU >> bs->bit); 184 v = (*bs->cur) & (0xffU >> bs->bit);
185 l = b + bs->bit; 185 l = b + bs->bit;
@@ -203,9 +203,9 @@ unsigned get_bits(bitstr_t * bs, unsigned b)
203 203
204/****************************************************************************/ 204/****************************************************************************/
205/* Assume b <= 32 */ 205/* Assume b <= 32 */
206unsigned get_bitmap(bitstr_t * bs, unsigned b) 206static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
207{ 207{
208 unsigned v, l, shift, bytes; 208 unsigned int v, l, shift, bytes;
209 209
210 if (!b) 210 if (!b)
211 return 0; 211 return 0;
@@ -213,18 +213,18 @@ unsigned get_bitmap(bitstr_t * bs, unsigned b)
213 l = bs->bit + b; 213 l = bs->bit + b;
214 214
215 if (l < 8) { 215 if (l < 8) {
216 v = (unsigned) (*bs->cur) << (bs->bit + 24); 216 v = (unsigned int)(*bs->cur) << (bs->bit + 24);
217 bs->bit = l; 217 bs->bit = l;
218 } else if (l == 8) { 218 } else if (l == 8) {
219 v = (unsigned) (*bs->cur++) << (bs->bit + 24); 219 v = (unsigned int)(*bs->cur++) << (bs->bit + 24);
220 bs->bit = 0; 220 bs->bit = 0;
221 } else { 221 } else {
222 for (bytes = l >> 3, shift = 24, v = 0; bytes; 222 for (bytes = l >> 3, shift = 24, v = 0; bytes;
223 bytes--, shift -= 8) 223 bytes--, shift -= 8)
224 v |= (unsigned) (*bs->cur++) << shift; 224 v |= (unsigned int)(*bs->cur++) << shift;
225 225
226 if (l < 32) { 226 if (l < 32) {
227 v |= (unsigned) (*bs->cur) << shift; 227 v |= (unsigned int)(*bs->cur) << shift;
228 v <<= bs->bit; 228 v <<= bs->bit;
229 } else if (l > 32) { 229 } else if (l > 32) {
230 v <<= bs->bit; 230 v <<= bs->bit;
@@ -242,9 +242,9 @@ unsigned get_bitmap(bitstr_t * bs, unsigned b)
242/**************************************************************************** 242/****************************************************************************
243 * Assume bs is aligned and sizeof(unsigned int) == 4 243 * Assume bs is aligned and sizeof(unsigned int) == 4
244 ****************************************************************************/ 244 ****************************************************************************/
245unsigned get_uint(bitstr_t * bs, int b) 245static unsigned int get_uint(bitstr_t *bs, int b)
246{ 246{
247 unsigned v = 0; 247 unsigned int v = 0;
248 248
249 switch (b) { 249 switch (b) {
250 case 4: 250 case 4:
@@ -264,7 +264,8 @@ unsigned get_uint(bitstr_t * bs, int b)
264} 264}
265 265
266/****************************************************************************/ 266/****************************************************************************/
267int decode_nul(bitstr_t * bs, field_t * f, char *base, int level) 267static int decode_nul(bitstr_t *bs, const struct field_t *f,
268 char *base, int level)
268{ 269{
269 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 270 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
270 271
@@ -272,7 +273,8 @@ int decode_nul(bitstr_t * bs, field_t * f, char *base, int level)
272} 273}
273 274
274/****************************************************************************/ 275/****************************************************************************/
275int decode_bool(bitstr_t * bs, field_t * f, char *base, int level) 276static int decode_bool(bitstr_t *bs, const struct field_t *f,
277 char *base, int level)
276{ 278{
277 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
278 280
@@ -283,7 +285,8 @@ int decode_bool(bitstr_t * bs, field_t * f, char *base, int level)
283} 285}
284 286
285/****************************************************************************/ 287/****************************************************************************/
286int decode_oid(bitstr_t * bs, field_t * f, char *base, int level) 288static int decode_oid(bitstr_t *bs, const struct field_t *f,
289 char *base, int level)
287{ 290{
288 int len; 291 int len;
289 292
@@ -299,9 +302,10 @@ int decode_oid(bitstr_t * bs, field_t * f, char *base, int level)
299} 302}
300 303
301/****************************************************************************/ 304/****************************************************************************/
302int decode_int(bitstr_t * bs, field_t * f, char *base, int level) 305static int decode_int(bitstr_t *bs, const struct field_t *f,
306 char *base, int level)
303{ 307{
304 unsigned len; 308 unsigned int len;
305 309
306 PRINT("%*.s%s", level * TAB_SIZE, " ", f->name); 310 PRINT("%*.s%s", level * TAB_SIZE, " ", f->name);
307 311
@@ -318,9 +322,9 @@ int decode_int(bitstr_t * bs, field_t * f, char *base, int level)
318 len = get_bits(bs, 2) + 1; 322 len = get_bits(bs, 2) + 1;
319 BYTE_ALIGN(bs); 323 BYTE_ALIGN(bs);
320 if (base && (f->attr & DECODE)) { /* timeToLive */ 324 if (base && (f->attr & DECODE)) { /* timeToLive */
321 unsigned v = get_uint(bs, len) + f->lb; 325 unsigned int v = get_uint(bs, len) + f->lb;
322 PRINT(" = %u", v); 326 PRINT(" = %u", v);
323 *((unsigned *) (base + f->offset)) = v; 327 *((unsigned int *)(base + f->offset)) = v;
324 } 328 }
325 bs->cur += len; 329 bs->cur += len;
326 break; 330 break;
@@ -342,7 +346,8 @@ int decode_int(bitstr_t * bs, field_t * f, char *base, int level)
342} 346}
343 347
344/****************************************************************************/ 348/****************************************************************************/
345int decode_enum(bitstr_t * bs, field_t * f, char *base, int level) 349static int decode_enum(bitstr_t *bs, const struct field_t *f,
350 char *base, int level)
346{ 351{
347 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 352 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
348 353
@@ -357,9 +362,10 @@ int decode_enum(bitstr_t * bs, field_t * f, char *base, int level)
357} 362}
358 363
359/****************************************************************************/ 364/****************************************************************************/
360int decode_bitstr(bitstr_t * bs, field_t * f, char *base, int level) 365static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
366 char *base, int level)
361{ 367{
362 unsigned len; 368 unsigned int len;
363 369
364 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 370 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
365 371
@@ -390,9 +396,10 @@ int decode_bitstr(bitstr_t * bs, field_t * f, char *base, int level)
390} 396}
391 397
392/****************************************************************************/ 398/****************************************************************************/
393int decode_numstr(bitstr_t * bs, field_t * f, char *base, int level) 399static int decode_numstr(bitstr_t *bs, const struct field_t *f,
400 char *base, int level)
394{ 401{
395 unsigned len; 402 unsigned int len;
396 403
397 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 404 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
398 405
@@ -407,9 +414,10 @@ int decode_numstr(bitstr_t * bs, field_t * f, char *base, int level)
407} 414}
408 415
409/****************************************************************************/ 416/****************************************************************************/
410int decode_octstr(bitstr_t * bs, field_t * f, char *base, int level) 417static int decode_octstr(bitstr_t *bs, const struct field_t *f,
418 char *base, int level)
411{ 419{
412 unsigned len; 420 unsigned int len;
413 421
414 PRINT("%*.s%s", level * TAB_SIZE, " ", f->name); 422 PRINT("%*.s%s", level * TAB_SIZE, " ", f->name);
415 423
@@ -424,7 +432,7 @@ int decode_octstr(bitstr_t * bs, field_t * f, char *base, int level)
424 bs->cur[0], bs->cur[1], 432 bs->cur[0], bs->cur[1],
425 bs->cur[2], bs->cur[3], 433 bs->cur[2], bs->cur[3],
426 bs->cur[4] * 256 + bs->cur[5])); 434 bs->cur[4] * 256 + bs->cur[5]));
427 *((unsigned *) (base + f->offset)) = 435 *((unsigned int *)(base + f->offset)) =
428 bs->cur - bs->buf; 436 bs->cur - bs->buf;
429 } 437 }
430 } 438 }
@@ -455,9 +463,10 @@ int decode_octstr(bitstr_t * bs, field_t * f, char *base, int level)
455} 463}
456 464
457/****************************************************************************/ 465/****************************************************************************/
458int decode_bmpstr(bitstr_t * bs, field_t * f, char *base, int level) 466static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
467 char *base, int level)
459{ 468{
460 unsigned len; 469 unsigned int len;
461 470
462 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 471 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
463 472
@@ -480,11 +489,12 @@ int decode_bmpstr(bitstr_t * bs, field_t * f, char *base, int level)
480} 489}
481 490
482/****************************************************************************/ 491/****************************************************************************/
483int decode_seq(bitstr_t * bs, field_t * f, char *base, int level) 492static int decode_seq(bitstr_t *bs, const struct field_t *f,
493 char *base, int level)
484{ 494{
485 unsigned ext, bmp, i, opt, len = 0, bmp2, bmp2_len; 495 unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
486 int err; 496 int err;
487 field_t *son; 497 const struct field_t *son;
488 unsigned char *beg = NULL; 498 unsigned char *beg = NULL;
489 499
490 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 500 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -498,7 +508,7 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level)
498 /* Get fields bitmap */ 508 /* Get fields bitmap */
499 bmp = get_bitmap(bs, f->sz); 509 bmp = get_bitmap(bs, f->sz);
500 if (base) 510 if (base)
501 *(unsigned *) base = bmp; 511 *(unsigned int *)base = bmp;
502 512
503 /* Decode the root components */ 513 /* Decode the root components */
504 for (i = opt = 0, son = f->fields; i < f->lb; i++, son++) { 514 for (i = opt = 0, son = f->fields; i < f->lb; i++, son++) {
@@ -550,7 +560,7 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level)
550 bmp2 = get_bitmap(bs, bmp2_len); 560 bmp2 = get_bitmap(bs, bmp2_len);
551 bmp |= bmp2 >> f->sz; 561 bmp |= bmp2 >> f->sz;
552 if (base) 562 if (base)
553 *(unsigned *) base = bmp; 563 *(unsigned int *)base = bmp;
554 BYTE_ALIGN(bs); 564 BYTE_ALIGN(bs);
555 565
556 /* Decode the extension components */ 566 /* Decode the extension components */
@@ -596,11 +606,12 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level)
596} 606}
597 607
598/****************************************************************************/ 608/****************************************************************************/
599int decode_seqof(bitstr_t * bs, field_t * f, char *base, int level) 609static int decode_seqof(bitstr_t *bs, const struct field_t *f,
610 char *base, int level)
600{ 611{
601 unsigned count, effective_count = 0, i, len = 0; 612 unsigned int count, effective_count = 0, i, len = 0;
602 int err; 613 int err;
603 field_t *son; 614 const struct field_t *son;
604 unsigned char *beg = NULL; 615 unsigned char *beg = NULL;
605 616
606 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 617 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -636,8 +647,8 @@ int decode_seqof(bitstr_t * bs, field_t * f, char *base, int level)
636 /* Write Count */ 647 /* Write Count */
637 if (base) { 648 if (base) {
638 effective_count = count > f->ub ? f->ub : count; 649 effective_count = count > f->ub ? f->ub : count;
639 *(unsigned *) base = effective_count; 650 *(unsigned int *)base = effective_count;
640 base += sizeof(unsigned); 651 base += sizeof(unsigned int);
641 } 652 }
642 653
643 /* Decode nested field */ 654 /* Decode nested field */
@@ -685,11 +696,12 @@ int decode_seqof(bitstr_t * bs, field_t * f, char *base, int level)
685 696
686 697
687/****************************************************************************/ 698/****************************************************************************/
688int decode_choice(bitstr_t * bs, field_t * f, char *base, int level) 699static int decode_choice(bitstr_t *bs, const struct field_t *f,
700 char *base, int level)
689{ 701{
690 unsigned type, ext, len = 0; 702 unsigned int type, ext, len = 0;
691 int err; 703 int err;
692 field_t *son; 704 const struct field_t *son;
693 unsigned char *beg = NULL; 705 unsigned char *beg = NULL;
694 706
695 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 707 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -710,7 +722,7 @@ int decode_choice(bitstr_t * bs, field_t * f, char *base, int level)
710 722
711 /* Write Type */ 723 /* Write Type */
712 if (base) 724 if (base)
713 *(unsigned *) base = type; 725 *(unsigned int *)base = type;
714 726
715 /* Check Range */ 727 /* Check Range */
716 if (type >= f->ub) { /* Newer version? */ 728 if (type >= f->ub) { /* Newer version? */
@@ -754,9 +766,9 @@ int decode_choice(bitstr_t * bs, field_t * f, char *base, int level)
754} 766}
755 767
756/****************************************************************************/ 768/****************************************************************************/
757int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage * ras) 769int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
758{ 770{
759 static field_t ras_message = { 771 static const struct field_t ras_message = {
760 FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT, 772 FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
761 0, _RasMessage 773 0, _RasMessage
762 }; 774 };
@@ -771,9 +783,9 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage * ras)
771 783
772/****************************************************************************/ 784/****************************************************************************/
773static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg, 785static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
774 size_t sz, H323_UserInformation * uuie) 786 size_t sz, H323_UserInformation *uuie)
775{ 787{
776 static field_t h323_userinformation = { 788 static const struct field_t h323_userinformation = {
777 FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT, 789 FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
778 0, _H323_UserInformation 790 0, _H323_UserInformation
779 }; 791 };
@@ -792,7 +804,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
792 MultimediaSystemControlMessage * 804 MultimediaSystemControlMessage *
793 mscm) 805 mscm)
794{ 806{
795 static field_t multimediasystemcontrolmessage = { 807 static const struct field_t multimediasystemcontrolmessage = {
796 FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4, 808 FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
797 DECODE | EXT, 0, _MultimediaSystemControlMessage 809 DECODE | EXT, 0, _MultimediaSystemControlMessage
798 }; 810 };
@@ -807,7 +819,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
807} 819}
808 820
809/****************************************************************************/ 821/****************************************************************************/
810int DecodeQ931(unsigned char *buf, size_t sz, Q931 * q931) 822int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
811{ 823{
812 unsigned char *p = buf; 824 unsigned char *p = buf;
813 int len; 825 int len;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f23fd9598e19..62137879e6aa 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -50,12 +50,12 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
50int (*set_h245_addr_hook) (struct sk_buff *skb, 50int (*set_h245_addr_hook) (struct sk_buff *skb,
51 unsigned char **data, int dataoff, 51 unsigned char **data, int dataoff,
52 H245_TransportAddress *taddr, 52 H245_TransportAddress *taddr,
53 union nf_conntrack_address *addr, __be16 port) 53 union nf_inet_addr *addr, __be16 port)
54 __read_mostly; 54 __read_mostly;
55int (*set_h225_addr_hook) (struct sk_buff *skb, 55int (*set_h225_addr_hook) (struct sk_buff *skb,
56 unsigned char **data, int dataoff, 56 unsigned char **data, int dataoff,
57 TransportAddress *taddr, 57 TransportAddress *taddr,
58 union nf_conntrack_address *addr, __be16 port) 58 union nf_inet_addr *addr, __be16 port)
59 __read_mostly; 59 __read_mostly;
60int (*set_sig_addr_hook) (struct sk_buff *skb, 60int (*set_sig_addr_hook) (struct sk_buff *skb,
61 struct nf_conn *ct, 61 struct nf_conn *ct,
@@ -114,7 +114,8 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
114{ 114{
115 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 115 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
116 int dir = CTINFO2DIR(ctinfo); 116 int dir = CTINFO2DIR(ctinfo);
117 struct tcphdr _tcph, *th; 117 const struct tcphdr *th;
118 struct tcphdr _tcph;
118 int tcpdatalen; 119 int tcpdatalen;
119 int tcpdataoff; 120 int tcpdataoff;
120 unsigned char *tpkt; 121 unsigned char *tpkt;
@@ -212,11 +213,11 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
212} 213}
213 214
214/****************************************************************************/ 215/****************************************************************************/
215static int get_h245_addr(struct nf_conn *ct, unsigned char *data, 216static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
216 H245_TransportAddress *taddr, 217 H245_TransportAddress *taddr,
217 union nf_conntrack_address *addr, __be16 *port) 218 union nf_inet_addr *addr, __be16 *port)
218{ 219{
219 unsigned char *p; 220 const unsigned char *p;
220 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 221 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
221 int len; 222 int len;
222 223
@@ -257,7 +258,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
257 int ret = 0; 258 int ret = 0;
258 __be16 port; 259 __be16 port;
259 __be16 rtp_port, rtcp_port; 260 __be16 rtp_port, rtcp_port;
260 union nf_conntrack_address addr; 261 union nf_inet_addr addr;
261 struct nf_conntrack_expect *rtp_exp; 262 struct nf_conntrack_expect *rtp_exp;
262 struct nf_conntrack_expect *rtcp_exp; 263 struct nf_conntrack_expect *rtcp_exp;
263 typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp; 264 typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
@@ -330,7 +331,7 @@ static int expect_t120(struct sk_buff *skb,
330 int dir = CTINFO2DIR(ctinfo); 331 int dir = CTINFO2DIR(ctinfo);
331 int ret = 0; 332 int ret = 0;
332 __be16 port; 333 __be16 port;
333 union nf_conntrack_address addr; 334 union nf_inet_addr addr;
334 struct nf_conntrack_expect *exp; 335 struct nf_conntrack_expect *exp;
335 typeof(nat_t120_hook) nat_t120; 336 typeof(nat_t120_hook) nat_t120;
336 337
@@ -623,9 +624,9 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
623/****************************************************************************/ 624/****************************************************************************/
624int get_h225_addr(struct nf_conn *ct, unsigned char *data, 625int get_h225_addr(struct nf_conn *ct, unsigned char *data,
625 TransportAddress *taddr, 626 TransportAddress *taddr,
626 union nf_conntrack_address *addr, __be16 *port) 627 union nf_inet_addr *addr, __be16 *port)
627{ 628{
628 unsigned char *p; 629 const unsigned char *p;
629 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 630 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
630 int len; 631 int len;
631 632
@@ -662,7 +663,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
662 int dir = CTINFO2DIR(ctinfo); 663 int dir = CTINFO2DIR(ctinfo);
663 int ret = 0; 664 int ret = 0;
664 __be16 port; 665 __be16 port;
665 union nf_conntrack_address addr; 666 union nf_inet_addr addr;
666 struct nf_conntrack_expect *exp; 667 struct nf_conntrack_expect *exp;
667 typeof(nat_h245_hook) nat_h245; 668 typeof(nat_h245_hook) nat_h245;
668 669
@@ -704,13 +705,18 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
704 705
705/* If the calling party is on the same side of the forward-to party, 706/* If the calling party is on the same side of the forward-to party,
706 * we don't need to track the second call */ 707 * we don't need to track the second call */
707static int callforward_do_filter(union nf_conntrack_address *src, 708static int callforward_do_filter(const union nf_inet_addr *src,
708 union nf_conntrack_address *dst, 709 const union nf_inet_addr *dst, int family)
709 int family)
710{ 710{
711 const struct nf_afinfo *afinfo;
711 struct flowi fl1, fl2; 712 struct flowi fl1, fl2;
712 int ret = 0; 713 int ret = 0;
713 714
715 /* rcu_read_lock()ed by nf_hook_slow() */
716 afinfo = nf_get_afinfo(family);
717 if (!afinfo)
718 return 0;
719
714 memset(&fl1, 0, sizeof(fl1)); 720 memset(&fl1, 0, sizeof(fl1));
715 memset(&fl2, 0, sizeof(fl2)); 721 memset(&fl2, 0, sizeof(fl2));
716 722
@@ -720,8 +726,8 @@ static int callforward_do_filter(union nf_conntrack_address *src,
720 726
721 fl1.fl4_dst = src->ip; 727 fl1.fl4_dst = src->ip;
722 fl2.fl4_dst = dst->ip; 728 fl2.fl4_dst = dst->ip;
723 if (ip_route_output_key(&rt1, &fl1) == 0) { 729 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
724 if (ip_route_output_key(&rt2, &fl2) == 0) { 730 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
725 if (rt1->rt_gateway == rt2->rt_gateway && 731 if (rt1->rt_gateway == rt2->rt_gateway &&
726 rt1->u.dst.dev == rt2->u.dst.dev) 732 rt1->u.dst.dev == rt2->u.dst.dev)
727 ret = 1; 733 ret = 1;
@@ -731,16 +737,15 @@ static int callforward_do_filter(union nf_conntrack_address *src,
731 } 737 }
732 break; 738 break;
733 } 739 }
734#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 740#if defined(CONFIG_NF_CONNTRACK_IPV6) || \
741 defined(CONFIG_NF_CONNTRACK_IPV6_MODULE)
735 case AF_INET6: { 742 case AF_INET6: {
736 struct rt6_info *rt1, *rt2; 743 struct rt6_info *rt1, *rt2;
737 744
738 memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); 745 memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst));
739 memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); 746 memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst));
740 rt1 = (struct rt6_info *)ip6_route_output(NULL, &fl1); 747 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
741 if (rt1) { 748 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
742 rt2 = (struct rt6_info *)ip6_route_output(NULL, &fl2);
743 if (rt2) {
744 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, 749 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
745 sizeof(rt1->rt6i_gateway)) && 750 sizeof(rt1->rt6i_gateway)) &&
746 rt1->u.dst.dev == rt2->u.dst.dev) 751 rt1->u.dst.dev == rt2->u.dst.dev)
@@ -767,7 +772,7 @@ static int expect_callforwarding(struct sk_buff *skb,
767 int dir = CTINFO2DIR(ctinfo); 772 int dir = CTINFO2DIR(ctinfo);
768 int ret = 0; 773 int ret = 0;
769 __be16 port; 774 __be16 port;
770 union nf_conntrack_address addr; 775 union nf_inet_addr addr;
771 struct nf_conntrack_expect *exp; 776 struct nf_conntrack_expect *exp;
772 typeof(nat_callforwarding_hook) nat_callforwarding; 777 typeof(nat_callforwarding_hook) nat_callforwarding;
773 778
@@ -823,7 +828,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
823 int ret; 828 int ret;
824 int i; 829 int i;
825 __be16 port; 830 __be16 port;
826 union nf_conntrack_address addr; 831 union nf_inet_addr addr;
827 typeof(set_h225_addr_hook) set_h225_addr; 832 typeof(set_h225_addr_hook) set_h225_addr;
828 833
829 pr_debug("nf_ct_q931: Setup\n"); 834 pr_debug("nf_ct_q931: Setup\n");
@@ -1180,7 +1185,8 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
1180static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, 1185static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
1181 int *datalen) 1186 int *datalen)
1182{ 1187{
1183 struct udphdr _uh, *uh; 1188 const struct udphdr *uh;
1189 struct udphdr _uh;
1184 int dataoff; 1190 int dataoff;
1185 1191
1186 uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh); 1192 uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh);
@@ -1195,7 +1201,7 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
1195 1201
1196/****************************************************************************/ 1202/****************************************************************************/
1197static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, 1203static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
1198 union nf_conntrack_address *addr, 1204 union nf_inet_addr *addr,
1199 __be16 port) 1205 __be16 port)
1200{ 1206{
1201 struct nf_conntrack_expect *exp; 1207 struct nf_conntrack_expect *exp;
@@ -1237,7 +1243,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
1237 int ret = 0; 1243 int ret = 0;
1238 int i; 1244 int i;
1239 __be16 port; 1245 __be16 port;
1240 union nf_conntrack_address addr; 1246 union nf_inet_addr addr;
1241 struct nf_conntrack_expect *exp; 1247 struct nf_conntrack_expect *exp;
1242 typeof(nat_q931_hook) nat_q931; 1248 typeof(nat_q931_hook) nat_q931;
1243 1249
@@ -1306,7 +1312,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
1306 int dir = CTINFO2DIR(ctinfo); 1312 int dir = CTINFO2DIR(ctinfo);
1307 int ret = 0; 1313 int ret = 0;
1308 __be16 port; 1314 __be16 port;
1309 union nf_conntrack_address addr; 1315 union nf_inet_addr addr;
1310 struct nf_conntrack_expect *exp; 1316 struct nf_conntrack_expect *exp;
1311 1317
1312 pr_debug("nf_ct_ras: GCF\n"); 1318 pr_debug("nf_ct_ras: GCF\n");
@@ -1410,7 +1416,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
1410 nf_ct_refresh(ct, skb, info->timeout * HZ); 1416 nf_ct_refresh(ct, skb, info->timeout * HZ);
1411 1417
1412 /* Set expect timeout */ 1418 /* Set expect timeout */
1413 read_lock_bh(&nf_conntrack_lock); 1419 spin_lock_bh(&nf_conntrack_lock);
1414 exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, 1420 exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
1415 info->sig_port[!dir]); 1421 info->sig_port[!dir]);
1416 if (exp) { 1422 if (exp) {
@@ -1420,7 +1426,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
1420 NF_CT_DUMP_TUPLE(&exp->tuple); 1426 NF_CT_DUMP_TUPLE(&exp->tuple);
1421 set_expect_timeout(exp, info->timeout); 1427 set_expect_timeout(exp, info->timeout);
1422 } 1428 }
1423 read_unlock_bh(&nf_conntrack_lock); 1429 spin_unlock_bh(&nf_conntrack_lock);
1424 } 1430 }
1425 1431
1426 return 0; 1432 return 0;
@@ -1463,10 +1469,10 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
1463 enum ip_conntrack_info ctinfo, 1469 enum ip_conntrack_info ctinfo,
1464 unsigned char **data, AdmissionRequest *arq) 1470 unsigned char **data, AdmissionRequest *arq)
1465{ 1471{
1466 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 1472 const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
1467 int dir = CTINFO2DIR(ctinfo); 1473 int dir = CTINFO2DIR(ctinfo);
1468 __be16 port; 1474 __be16 port;
1469 union nf_conntrack_address addr; 1475 union nf_inet_addr addr;
1470 typeof(set_h225_addr_hook) set_h225_addr; 1476 typeof(set_h225_addr_hook) set_h225_addr;
1471 1477
1472 pr_debug("nf_ct_ras: ARQ\n"); 1478 pr_debug("nf_ct_ras: ARQ\n");
@@ -1508,7 +1514,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
1508 int dir = CTINFO2DIR(ctinfo); 1514 int dir = CTINFO2DIR(ctinfo);
1509 int ret = 0; 1515 int ret = 0;
1510 __be16 port; 1516 __be16 port;
1511 union nf_conntrack_address addr; 1517 union nf_inet_addr addr;
1512 struct nf_conntrack_expect *exp; 1518 struct nf_conntrack_expect *exp;
1513 typeof(set_sig_addr_hook) set_sig_addr; 1519 typeof(set_sig_addr_hook) set_sig_addr;
1514 1520
@@ -1571,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
1571 int dir = CTINFO2DIR(ctinfo); 1577 int dir = CTINFO2DIR(ctinfo);
1572 int ret = 0; 1578 int ret = 0;
1573 __be16 port; 1579 __be16 port;
1574 union nf_conntrack_address addr; 1580 union nf_inet_addr addr;
1575 struct nf_conntrack_expect *exp; 1581 struct nf_conntrack_expect *exp;
1576 1582
1577 pr_debug("nf_ct_ras: LCF\n"); 1583 pr_debug("nf_ct_ras: LCF\n");
diff --git a/net/netfilter/nf_conntrack_h323_types.c b/net/netfilter/nf_conntrack_h323_types.c
index 3a21fdf1a265..d880f3523c1d 100644
--- a/net/netfilter/nf_conntrack_h323_types.c
+++ b/net/netfilter/nf_conntrack_h323_types.c
@@ -5,22 +5,22 @@
5 * This source code is licensed under General Public License version 2. 5 * This source code is licensed under General Public License version 2.
6 */ 6 */
7 7
8static field_t _TransportAddress_ipAddress[] = { /* SEQUENCE */ 8static const struct field_t _TransportAddress_ipAddress[] = { /* SEQUENCE */
9 {FNAME("ip") OCTSTR, FIXD, 4, 0, DECODE, 9 {FNAME("ip") OCTSTR, FIXD, 4, 0, DECODE,
10 offsetof(TransportAddress_ipAddress, ip), NULL}, 10 offsetof(TransportAddress_ipAddress, ip), NULL},
11 {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL}, 11 {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
12}; 12};
13 13
14static field_t _TransportAddress_ipSourceRoute_route[] = { /* SEQUENCE OF */ 14static const struct field_t _TransportAddress_ipSourceRoute_route[] = { /* SEQUENCE OF */
15 {FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL}, 15 {FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
16}; 16};
17 17
18static field_t _TransportAddress_ipSourceRoute_routing[] = { /* CHOICE */ 18static const struct field_t _TransportAddress_ipSourceRoute_routing[] = { /* CHOICE */
19 {FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 19 {FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL},
20 {FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 20 {FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL},
21}; 21};
22 22
23static field_t _TransportAddress_ipSourceRoute[] = { /* SEQUENCE */ 23static const struct field_t _TransportAddress_ipSourceRoute[] = { /* SEQUENCE */
24 {FNAME("ip") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL}, 24 {FNAME("ip") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
25 {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL}, 25 {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
26 {FNAME("route") SEQOF, SEMI, 0, 0, SKIP, 0, 26 {FNAME("route") SEQOF, SEMI, 0, 0, SKIP, 0,
@@ -29,37 +29,37 @@ static field_t _TransportAddress_ipSourceRoute[] = { /* SEQUENCE */
29 _TransportAddress_ipSourceRoute_routing}, 29 _TransportAddress_ipSourceRoute_routing},
30}; 30};
31 31
32static field_t _TransportAddress_ipxAddress[] = { /* SEQUENCE */ 32static const struct field_t _TransportAddress_ipxAddress[] = { /* SEQUENCE */
33 {FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL}, 33 {FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL},
34 {FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL}, 34 {FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
35 {FNAME("port") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL}, 35 {FNAME("port") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL},
36}; 36};
37 37
38static field_t _TransportAddress_ip6Address[] = { /* SEQUENCE */ 38static const struct field_t _TransportAddress_ip6Address[] = { /* SEQUENCE */
39 {FNAME("ip") OCTSTR, FIXD, 16, 0, DECODE, 39 {FNAME("ip") OCTSTR, FIXD, 16, 0, DECODE,
40 offsetof(TransportAddress_ip6Address, ip), NULL}, 40 offsetof(TransportAddress_ip6Address, ip), NULL},
41 {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL}, 41 {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
42}; 42};
43 43
44static field_t _H221NonStandard[] = { /* SEQUENCE */ 44static const struct field_t _H221NonStandard[] = { /* SEQUENCE */
45 {FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL}, 45 {FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL},
46 {FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL}, 46 {FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL},
47 {FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL}, 47 {FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL},
48}; 48};
49 49
50static field_t _NonStandardIdentifier[] = { /* CHOICE */ 50static const struct field_t _NonStandardIdentifier[] = { /* CHOICE */
51 {FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL}, 51 {FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL},
52 {FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP | EXT, 0, 52 {FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP | EXT, 0,
53 _H221NonStandard}, 53 _H221NonStandard},
54}; 54};
55 55
56static field_t _NonStandardParameter[] = { /* SEQUENCE */ 56static const struct field_t _NonStandardParameter[] = { /* SEQUENCE */
57 {FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP | EXT, 0, 57 {FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP | EXT, 0,
58 _NonStandardIdentifier}, 58 _NonStandardIdentifier},
59 {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 59 {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
60}; 60};
61 61
62static field_t _TransportAddress[] = { /* CHOICE */ 62static const struct field_t _TransportAddress[] = { /* CHOICE */
63 {FNAME("ipAddress") SEQ, 0, 2, 2, DECODE, 63 {FNAME("ipAddress") SEQ, 0, 2, 2, DECODE,
64 offsetof(TransportAddress, ipAddress), _TransportAddress_ipAddress}, 64 offsetof(TransportAddress, ipAddress), _TransportAddress_ipAddress},
65 {FNAME("ipSourceRoute") SEQ, 0, 4, 4, SKIP | EXT, 0, 65 {FNAME("ipSourceRoute") SEQ, 0, 4, 4, SKIP | EXT, 0,
@@ -75,7 +75,7 @@ static field_t _TransportAddress[] = { /* CHOICE */
75 _NonStandardParameter}, 75 _NonStandardParameter},
76}; 76};
77 77
78static field_t _AliasAddress[] = { /* CHOICE */ 78static const struct field_t _AliasAddress[] = { /* CHOICE */
79 {FNAME("dialedDigits") NUMDGT, 7, 1, 0, SKIP, 0, NULL}, 79 {FNAME("dialedDigits") NUMDGT, 7, 1, 0, SKIP, 0, NULL},
80 {FNAME("h323-ID") BMPSTR, BYTE, 1, 0, SKIP, 0, NULL}, 80 {FNAME("h323-ID") BMPSTR, BYTE, 1, 0, SKIP, 0, NULL},
81 {FNAME("url-ID") IA5STR, WORD, 1, 0, SKIP, 0, NULL}, 81 {FNAME("url-ID") IA5STR, WORD, 1, 0, SKIP, 0, NULL},
@@ -85,78 +85,78 @@ static field_t _AliasAddress[] = { /* CHOICE */
85 {FNAME("mobileUIM") CHOICE, 1, 2, 2, SKIP | EXT, 0, NULL}, 85 {FNAME("mobileUIM") CHOICE, 1, 2, 2, SKIP | EXT, 0, NULL},
86}; 86};
87 87
88static field_t _Setup_UUIE_sourceAddress[] = { /* SEQUENCE OF */ 88static const struct field_t _Setup_UUIE_sourceAddress[] = { /* SEQUENCE OF */
89 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 89 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
90}; 90};
91 91
92static field_t _VendorIdentifier[] = { /* SEQUENCE */ 92static const struct field_t _VendorIdentifier[] = { /* SEQUENCE */
93 {FNAME("vendor") SEQ, 0, 3, 3, SKIP | EXT, 0, _H221NonStandard}, 93 {FNAME("vendor") SEQ, 0, 3, 3, SKIP | EXT, 0, _H221NonStandard},
94 {FNAME("productId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL}, 94 {FNAME("productId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL},
95 {FNAME("versionId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL}, 95 {FNAME("versionId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL},
96}; 96};
97 97
98static field_t _GatekeeperInfo[] = { /* SEQUENCE */ 98static const struct field_t _GatekeeperInfo[] = { /* SEQUENCE */
99 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 99 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
100 _NonStandardParameter}, 100 _NonStandardParameter},
101}; 101};
102 102
103static field_t _H310Caps[] = { /* SEQUENCE */ 103static const struct field_t _H310Caps[] = { /* SEQUENCE */
104 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 104 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
105 _NonStandardParameter}, 105 _NonStandardParameter},
106 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 106 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
107 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 107 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
108}; 108};
109 109
110static field_t _H320Caps[] = { /* SEQUENCE */ 110static const struct field_t _H320Caps[] = { /* SEQUENCE */
111 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 111 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
112 _NonStandardParameter}, 112 _NonStandardParameter},
113 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 113 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
114 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 114 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
115}; 115};
116 116
117static field_t _H321Caps[] = { /* SEQUENCE */ 117static const struct field_t _H321Caps[] = { /* SEQUENCE */
118 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 118 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
119 _NonStandardParameter}, 119 _NonStandardParameter},
120 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 120 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
121 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 121 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
122}; 122};
123 123
124static field_t _H322Caps[] = { /* SEQUENCE */ 124static const struct field_t _H322Caps[] = { /* SEQUENCE */
125 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 125 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
126 _NonStandardParameter}, 126 _NonStandardParameter},
127 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 127 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
128 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 128 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
129}; 129};
130 130
131static field_t _H323Caps[] = { /* SEQUENCE */ 131static const struct field_t _H323Caps[] = { /* SEQUENCE */
132 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 132 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
133 _NonStandardParameter}, 133 _NonStandardParameter},
134 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 134 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
135 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 135 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
136}; 136};
137 137
138static field_t _H324Caps[] = { /* SEQUENCE */ 138static const struct field_t _H324Caps[] = { /* SEQUENCE */
139 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 139 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
140 _NonStandardParameter}, 140 _NonStandardParameter},
141 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 141 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
142 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 142 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
143}; 143};
144 144
145static field_t _VoiceCaps[] = { /* SEQUENCE */ 145static const struct field_t _VoiceCaps[] = { /* SEQUENCE */
146 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 146 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
147 _NonStandardParameter}, 147 _NonStandardParameter},
148 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 148 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
149 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 149 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
150}; 150};
151 151
152static field_t _T120OnlyCaps[] = { /* SEQUENCE */ 152static const struct field_t _T120OnlyCaps[] = { /* SEQUENCE */
153 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 153 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
154 _NonStandardParameter}, 154 _NonStandardParameter},
155 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 155 {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
156 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL}, 156 {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
157}; 157};
158 158
159static field_t _SupportedProtocols[] = { /* CHOICE */ 159static const struct field_t _SupportedProtocols[] = { /* CHOICE */
160 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP, 0, 160 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP, 0,
161 _NonStandardParameter}, 161 _NonStandardParameter},
162 {FNAME("h310") SEQ, 1, 1, 3, SKIP | EXT, 0, _H310Caps}, 162 {FNAME("h310") SEQ, 1, 1, 3, SKIP | EXT, 0, _H310Caps},
@@ -171,29 +171,29 @@ static field_t _SupportedProtocols[] = { /* CHOICE */
171 {FNAME("t38FaxAnnexbOnly") SEQ, 2, 5, 5, SKIP | EXT, 0, NULL}, 171 {FNAME("t38FaxAnnexbOnly") SEQ, 2, 5, 5, SKIP | EXT, 0, NULL},
172}; 172};
173 173
174static field_t _GatewayInfo_protocol[] = { /* SEQUENCE OF */ 174static const struct field_t _GatewayInfo_protocol[] = { /* SEQUENCE OF */
175 {FNAME("item") CHOICE, 4, 9, 11, SKIP | EXT, 0, _SupportedProtocols}, 175 {FNAME("item") CHOICE, 4, 9, 11, SKIP | EXT, 0, _SupportedProtocols},
176}; 176};
177 177
178static field_t _GatewayInfo[] = { /* SEQUENCE */ 178static const struct field_t _GatewayInfo[] = { /* SEQUENCE */
179 {FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, 179 {FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
180 _GatewayInfo_protocol}, 180 _GatewayInfo_protocol},
181 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 181 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
182 _NonStandardParameter}, 182 _NonStandardParameter},
183}; 183};
184 184
185static field_t _McuInfo[] = { /* SEQUENCE */ 185static const struct field_t _McuInfo[] = { /* SEQUENCE */
186 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 186 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
187 _NonStandardParameter}, 187 _NonStandardParameter},
188 {FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 188 {FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
189}; 189};
190 190
191static field_t _TerminalInfo[] = { /* SEQUENCE */ 191static const struct field_t _TerminalInfo[] = { /* SEQUENCE */
192 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 192 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
193 _NonStandardParameter}, 193 _NonStandardParameter},
194}; 194};
195 195
196static field_t _EndpointType[] = { /* SEQUENCE */ 196static const struct field_t _EndpointType[] = { /* SEQUENCE */
197 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 197 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
198 _NonStandardParameter}, 198 _NonStandardParameter},
199 {FNAME("vendor") SEQ, 2, 3, 3, SKIP | EXT | OPT, 0, 199 {FNAME("vendor") SEQ, 2, 3, 3, SKIP | EXT | OPT, 0,
@@ -210,19 +210,19 @@ static field_t _EndpointType[] = { /* SEQUENCE */
210 0, NULL}, 210 0, NULL},
211}; 211};
212 212
213static field_t _Setup_UUIE_destinationAddress[] = { /* SEQUENCE OF */ 213static const struct field_t _Setup_UUIE_destinationAddress[] = { /* SEQUENCE OF */
214 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 214 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
215}; 215};
216 216
217static field_t _Setup_UUIE_destExtraCallInfo[] = { /* SEQUENCE OF */ 217static const struct field_t _Setup_UUIE_destExtraCallInfo[] = { /* SEQUENCE OF */
218 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 218 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
219}; 219};
220 220
221static field_t _Setup_UUIE_destExtraCRV[] = { /* SEQUENCE OF */ 221static const struct field_t _Setup_UUIE_destExtraCRV[] = { /* SEQUENCE OF */
222 {FNAME("item") INT, WORD, 0, 0, SKIP, 0, NULL}, 222 {FNAME("item") INT, WORD, 0, 0, SKIP, 0, NULL},
223}; 223};
224 224
225static field_t _Setup_UUIE_conferenceGoal[] = { /* CHOICE */ 225static const struct field_t _Setup_UUIE_conferenceGoal[] = { /* CHOICE */
226 {FNAME("create") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 226 {FNAME("create") NUL, FIXD, 0, 0, SKIP, 0, NULL},
227 {FNAME("join") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 227 {FNAME("join") NUL, FIXD, 0, 0, SKIP, 0, NULL},
228 {FNAME("invite") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 228 {FNAME("invite") NUL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -231,12 +231,12 @@ static field_t _Setup_UUIE_conferenceGoal[] = { /* CHOICE */
231 0, NULL}, 231 0, NULL},
232}; 232};
233 233
234static field_t _Q954Details[] = { /* SEQUENCE */ 234static const struct field_t _Q954Details[] = { /* SEQUENCE */
235 {FNAME("conferenceCalling") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 235 {FNAME("conferenceCalling") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
236 {FNAME("threePartyService") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 236 {FNAME("threePartyService") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
237}; 237};
238 238
239static field_t _QseriesOptions[] = { /* SEQUENCE */ 239static const struct field_t _QseriesOptions[] = { /* SEQUENCE */
240 {FNAME("q932Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 240 {FNAME("q932Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
241 {FNAME("q951Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 241 {FNAME("q951Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
242 {FNAME("q952Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 242 {FNAME("q952Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -247,32 +247,32 @@ static field_t _QseriesOptions[] = { /* SEQUENCE */
247 {FNAME("q954Info") SEQ, 0, 2, 2, SKIP | EXT, 0, _Q954Details}, 247 {FNAME("q954Info") SEQ, 0, 2, 2, SKIP | EXT, 0, _Q954Details},
248}; 248};
249 249
250static field_t _CallType[] = { /* CHOICE */ 250static const struct field_t _CallType[] = { /* CHOICE */
251 {FNAME("pointToPoint") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 251 {FNAME("pointToPoint") NUL, FIXD, 0, 0, SKIP, 0, NULL},
252 {FNAME("oneToN") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 252 {FNAME("oneToN") NUL, FIXD, 0, 0, SKIP, 0, NULL},
253 {FNAME("nToOne") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 253 {FNAME("nToOne") NUL, FIXD, 0, 0, SKIP, 0, NULL},
254 {FNAME("nToN") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 254 {FNAME("nToN") NUL, FIXD, 0, 0, SKIP, 0, NULL},
255}; 255};
256 256
257static field_t _H245_NonStandardIdentifier_h221NonStandard[] = { /* SEQUENCE */ 257static const struct field_t _H245_NonStandardIdentifier_h221NonStandard[] = { /* SEQUENCE */
258 {FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL}, 258 {FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL},
259 {FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL}, 259 {FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL},
260 {FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL}, 260 {FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL},
261}; 261};
262 262
263static field_t _H245_NonStandardIdentifier[] = { /* CHOICE */ 263static const struct field_t _H245_NonStandardIdentifier[] = { /* CHOICE */
264 {FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL}, 264 {FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL},
265 {FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP, 0, 265 {FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP, 0,
266 _H245_NonStandardIdentifier_h221NonStandard}, 266 _H245_NonStandardIdentifier_h221NonStandard},
267}; 267};
268 268
269static field_t _H245_NonStandardParameter[] = { /* SEQUENCE */ 269static const struct field_t _H245_NonStandardParameter[] = { /* SEQUENCE */
270 {FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP, 0, 270 {FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP, 0,
271 _H245_NonStandardIdentifier}, 271 _H245_NonStandardIdentifier},
272 {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 272 {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
273}; 273};
274 274
275static field_t _H261VideoCapability[] = { /* SEQUENCE */ 275static const struct field_t _H261VideoCapability[] = { /* SEQUENCE */
276 {FNAME("qcifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL}, 276 {FNAME("qcifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL},
277 {FNAME("cifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL}, 277 {FNAME("cifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL},
278 {FNAME("temporalSpatialTradeOffCapability") BOOL, FIXD, 0, 0, SKIP, 0, 278 {FNAME("temporalSpatialTradeOffCapability") BOOL, FIXD, 0, 0, SKIP, 0,
@@ -282,7 +282,7 @@ static field_t _H261VideoCapability[] = { /* SEQUENCE */
282 {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 282 {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
283}; 283};
284 284
285static field_t _H262VideoCapability[] = { /* SEQUENCE */ 285static const struct field_t _H262VideoCapability[] = { /* SEQUENCE */
286 {FNAME("profileAndLevel-SPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 286 {FNAME("profileAndLevel-SPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
287 {FNAME("profileAndLevel-MPatLL") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 287 {FNAME("profileAndLevel-MPatLL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
288 {FNAME("profileAndLevel-MPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 288 {FNAME("profileAndLevel-MPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -304,7 +304,7 @@ static field_t _H262VideoCapability[] = { /* SEQUENCE */
304 {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 304 {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
305}; 305};
306 306
307static field_t _H263VideoCapability[] = { /* SEQUENCE */ 307static const struct field_t _H263VideoCapability[] = { /* SEQUENCE */
308 {FNAME("sqcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL}, 308 {FNAME("sqcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
309 {FNAME("qcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL}, 309 {FNAME("qcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
310 {FNAME("cifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL}, 310 {FNAME("cifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
@@ -330,7 +330,7 @@ static field_t _H263VideoCapability[] = { /* SEQUENCE */
330 {FNAME("h263Options") SEQ, 5, 29, 31, SKIP | EXT | OPT, 0, NULL}, 330 {FNAME("h263Options") SEQ, 5, 29, 31, SKIP | EXT | OPT, 0, NULL},
331}; 331};
332 332
333static field_t _IS11172VideoCapability[] = { /* SEQUENCE */ 333static const struct field_t _IS11172VideoCapability[] = { /* SEQUENCE */
334 {FNAME("constrainedBitstream") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 334 {FNAME("constrainedBitstream") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
335 {FNAME("videoBitRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL}, 335 {FNAME("videoBitRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
336 {FNAME("vbvBufferSize") INT, CONS, 0, 0, SKIP | OPT, 0, NULL}, 336 {FNAME("vbvBufferSize") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
@@ -341,7 +341,7 @@ static field_t _IS11172VideoCapability[] = { /* SEQUENCE */
341 {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 341 {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
342}; 342};
343 343
344static field_t _VideoCapability[] = { /* CHOICE */ 344static const struct field_t _VideoCapability[] = { /* CHOICE */
345 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, 345 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
346 _H245_NonStandardParameter}, 346 _H245_NonStandardParameter},
347 {FNAME("h261VideoCapability") SEQ, 2, 5, 6, SKIP | EXT, 0, 347 {FNAME("h261VideoCapability") SEQ, 2, 5, 6, SKIP | EXT, 0,
@@ -355,12 +355,12 @@ static field_t _VideoCapability[] = { /* CHOICE */
355 {FNAME("genericVideoCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL}, 355 {FNAME("genericVideoCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
356}; 356};
357 357
358static field_t _AudioCapability_g7231[] = { /* SEQUENCE */ 358static const struct field_t _AudioCapability_g7231[] = { /* SEQUENCE */
359 {FNAME("maxAl-sduAudioFrames") INT, BYTE, 1, 0, SKIP, 0, NULL}, 359 {FNAME("maxAl-sduAudioFrames") INT, BYTE, 1, 0, SKIP, 0, NULL},
360 {FNAME("silenceSuppression") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 360 {FNAME("silenceSuppression") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
361}; 361};
362 362
363static field_t _IS11172AudioCapability[] = { /* SEQUENCE */ 363static const struct field_t _IS11172AudioCapability[] = { /* SEQUENCE */
364 {FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 364 {FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
365 {FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 365 {FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
366 {FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 366 {FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -372,7 +372,7 @@ static field_t _IS11172AudioCapability[] = { /* SEQUENCE */
372 {FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL}, 372 {FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
373}; 373};
374 374
375static field_t _IS13818AudioCapability[] = { /* SEQUENCE */ 375static const struct field_t _IS13818AudioCapability[] = { /* SEQUENCE */
376 {FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 376 {FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
377 {FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 377 {FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
378 {FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 378 {FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -396,7 +396,7 @@ static field_t _IS13818AudioCapability[] = { /* SEQUENCE */
396 {FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL}, 396 {FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
397}; 397};
398 398
399static field_t _AudioCapability[] = { /* CHOICE */ 399static const struct field_t _AudioCapability[] = { /* CHOICE */
400 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, 400 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
401 _H245_NonStandardParameter}, 401 _H245_NonStandardParameter},
402 {FNAME("g711Alaw64k") INT, BYTE, 1, 0, SKIP, 0, NULL}, 402 {FNAME("g711Alaw64k") INT, BYTE, 1, 0, SKIP, 0, NULL},
@@ -424,7 +424,7 @@ static field_t _AudioCapability[] = { /* CHOICE */
424 {FNAME("g729Extensions") SEQ, 1, 8, 8, SKIP | EXT, 0, NULL}, 424 {FNAME("g729Extensions") SEQ, 1, 8, 8, SKIP | EXT, 0, NULL},
425}; 425};
426 426
427static field_t _DataProtocolCapability[] = { /* CHOICE */ 427static const struct field_t _DataProtocolCapability[] = { /* CHOICE */
428 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, 428 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
429 _H245_NonStandardParameter}, 429 _H245_NonStandardParameter},
430 {FNAME("v14buffered") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 430 {FNAME("v14buffered") NUL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -442,7 +442,7 @@ static field_t _DataProtocolCapability[] = { /* CHOICE */
442 {FNAME("udp") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 442 {FNAME("udp") NUL, FIXD, 0, 0, SKIP, 0, NULL},
443}; 443};
444 444
445static field_t _T84Profile_t84Restricted[] = { /* SEQUENCE */ 445static const struct field_t _T84Profile_t84Restricted[] = { /* SEQUENCE */
446 {FNAME("qcif") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 446 {FNAME("qcif") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
447 {FNAME("cif") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 447 {FNAME("cif") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
448 {FNAME("ccir601Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 448 {FNAME("ccir601Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -464,25 +464,25 @@ static field_t _T84Profile_t84Restricted[] = { /* SEQUENCE */
464 {FNAME("digPhotoHighProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 464 {FNAME("digPhotoHighProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
465}; 465};
466 466
467static field_t _T84Profile[] = { /* CHOICE */ 467static const struct field_t _T84Profile[] = { /* CHOICE */
468 {FNAME("t84Unrestricted") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 468 {FNAME("t84Unrestricted") NUL, FIXD, 0, 0, SKIP, 0, NULL},
469 {FNAME("t84Restricted") SEQ, 0, 19, 19, SKIP | EXT, 0, 469 {FNAME("t84Restricted") SEQ, 0, 19, 19, SKIP | EXT, 0,
470 _T84Profile_t84Restricted}, 470 _T84Profile_t84Restricted},
471}; 471};
472 472
473static field_t _DataApplicationCapability_application_t84[] = { /* SEQUENCE */ 473static const struct field_t _DataApplicationCapability_application_t84[] = { /* SEQUENCE */
474 {FNAME("t84Protocol") CHOICE, 3, 7, 14, SKIP | EXT, 0, 474 {FNAME("t84Protocol") CHOICE, 3, 7, 14, SKIP | EXT, 0,
475 _DataProtocolCapability}, 475 _DataProtocolCapability},
476 {FNAME("t84Profile") CHOICE, 1, 2, 2, SKIP, 0, _T84Profile}, 476 {FNAME("t84Profile") CHOICE, 1, 2, 2, SKIP, 0, _T84Profile},
477}; 477};
478 478
479static field_t _DataApplicationCapability_application_nlpid[] = { /* SEQUENCE */ 479static const struct field_t _DataApplicationCapability_application_nlpid[] = { /* SEQUENCE */
480 {FNAME("nlpidProtocol") CHOICE, 3, 7, 14, SKIP | EXT, 0, 480 {FNAME("nlpidProtocol") CHOICE, 3, 7, 14, SKIP | EXT, 0,
481 _DataProtocolCapability}, 481 _DataProtocolCapability},
482 {FNAME("nlpidData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 482 {FNAME("nlpidData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
483}; 483};
484 484
485static field_t _DataApplicationCapability_application[] = { /* CHOICE */ 485static const struct field_t _DataApplicationCapability_application[] = { /* CHOICE */
486 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, 486 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
487 _H245_NonStandardParameter}, 487 _H245_NonStandardParameter},
488 {FNAME("t120") CHOICE, 3, 7, 14, DECODE | EXT, 488 {FNAME("t120") CHOICE, 3, 7, 14, DECODE | EXT,
@@ -509,20 +509,20 @@ static field_t _DataApplicationCapability_application[] = { /* CHOICE */
509 {FNAME("genericDataCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL}, 509 {FNAME("genericDataCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
510}; 510};
511 511
512static field_t _DataApplicationCapability[] = { /* SEQUENCE */ 512static const struct field_t _DataApplicationCapability[] = { /* SEQUENCE */
513 {FNAME("application") CHOICE, 4, 10, 14, DECODE | EXT, 513 {FNAME("application") CHOICE, 4, 10, 14, DECODE | EXT,
514 offsetof(DataApplicationCapability, application), 514 offsetof(DataApplicationCapability, application),
515 _DataApplicationCapability_application}, 515 _DataApplicationCapability_application},
516 {FNAME("maxBitRate") INT, CONS, 0, 0, SKIP, 0, NULL}, 516 {FNAME("maxBitRate") INT, CONS, 0, 0, SKIP, 0, NULL},
517}; 517};
518 518
519static field_t _EncryptionMode[] = { /* CHOICE */ 519static const struct field_t _EncryptionMode[] = { /* CHOICE */
520 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, 520 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
521 _H245_NonStandardParameter}, 521 _H245_NonStandardParameter},
522 {FNAME("h233Encryption") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 522 {FNAME("h233Encryption") NUL, FIXD, 0, 0, SKIP, 0, NULL},
523}; 523};
524 524
525static field_t _DataType[] = { /* CHOICE */ 525static const struct field_t _DataType[] = { /* CHOICE */
526 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, 526 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
527 _H245_NonStandardParameter}, 527 _H245_NonStandardParameter},
528 {FNAME("nullData") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 528 {FNAME("nullData") NUL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -538,7 +538,7 @@ static field_t _DataType[] = { /* CHOICE */
538 {FNAME("multiplexedStream") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL}, 538 {FNAME("multiplexedStream") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL},
539}; 539};
540 540
541static field_t _H222LogicalChannelParameters[] = { /* SEQUENCE */ 541static const struct field_t _H222LogicalChannelParameters[] = { /* SEQUENCE */
542 {FNAME("resourceID") INT, WORD, 0, 0, SKIP, 0, NULL}, 542 {FNAME("resourceID") INT, WORD, 0, 0, SKIP, 0, NULL},
543 {FNAME("subChannelID") INT, WORD, 0, 0, SKIP, 0, NULL}, 543 {FNAME("subChannelID") INT, WORD, 0, 0, SKIP, 0, NULL},
544 {FNAME("pcr-pid") INT, WORD, 0, 0, SKIP | OPT, 0, NULL}, 544 {FNAME("pcr-pid") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
@@ -546,12 +546,12 @@ static field_t _H222LogicalChannelParameters[] = { /* SEQUENCE */
546 {FNAME("streamDescriptors") OCTSTR, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 546 {FNAME("streamDescriptors") OCTSTR, SEMI, 0, 0, SKIP | OPT, 0, NULL},
547}; 547};
548 548
549static field_t _H223LogicalChannelParameters_adaptationLayerType_al3[] = { /* SEQUENCE */ 549static const struct field_t _H223LogicalChannelParameters_adaptationLayerType_al3[] = { /* SEQUENCE */
550 {FNAME("controlFieldOctets") INT, 2, 0, 0, SKIP, 0, NULL}, 550 {FNAME("controlFieldOctets") INT, 2, 0, 0, SKIP, 0, NULL},
551 {FNAME("sendBufferSize") INT, CONS, 0, 0, SKIP, 0, NULL}, 551 {FNAME("sendBufferSize") INT, CONS, 0, 0, SKIP, 0, NULL},
552}; 552};
553 553
554static field_t _H223LogicalChannelParameters_adaptationLayerType[] = { /* CHOICE */ 554static const struct field_t _H223LogicalChannelParameters_adaptationLayerType[] = { /* CHOICE */
555 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, 555 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
556 _H245_NonStandardParameter}, 556 _H245_NonStandardParameter},
557 {FNAME("al1Framed") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 557 {FNAME("al1Framed") NUL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -565,53 +565,53 @@ static field_t _H223LogicalChannelParameters_adaptationLayerType[] = { /* CHOICE
565 {FNAME("al3M") SEQ, 0, 5, 6, SKIP | EXT, 0, NULL}, 565 {FNAME("al3M") SEQ, 0, 5, 6, SKIP | EXT, 0, NULL},
566}; 566};
567 567
568static field_t _H223LogicalChannelParameters[] = { /* SEQUENCE */ 568static const struct field_t _H223LogicalChannelParameters[] = { /* SEQUENCE */
569 {FNAME("adaptationLayerType") CHOICE, 3, 6, 9, SKIP | EXT, 0, 569 {FNAME("adaptationLayerType") CHOICE, 3, 6, 9, SKIP | EXT, 0,
570 _H223LogicalChannelParameters_adaptationLayerType}, 570 _H223LogicalChannelParameters_adaptationLayerType},
571 {FNAME("segmentableFlag") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 571 {FNAME("segmentableFlag") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
572}; 572};
573 573
574static field_t _CRCLength[] = { /* CHOICE */ 574static const struct field_t _CRCLength[] = { /* CHOICE */
575 {FNAME("crc8bit") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 575 {FNAME("crc8bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
576 {FNAME("crc16bit") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 576 {FNAME("crc16bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
577 {FNAME("crc32bit") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 577 {FNAME("crc32bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
578}; 578};
579 579
580static field_t _V76HDLCParameters[] = { /* SEQUENCE */ 580static const struct field_t _V76HDLCParameters[] = { /* SEQUENCE */
581 {FNAME("crcLength") CHOICE, 2, 3, 3, SKIP | EXT, 0, _CRCLength}, 581 {FNAME("crcLength") CHOICE, 2, 3, 3, SKIP | EXT, 0, _CRCLength},
582 {FNAME("n401") INT, WORD, 1, 0, SKIP, 0, NULL}, 582 {FNAME("n401") INT, WORD, 1, 0, SKIP, 0, NULL},
583 {FNAME("loopbackTestProcedure") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 583 {FNAME("loopbackTestProcedure") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
584}; 584};
585 585
586static field_t _V76LogicalChannelParameters_suspendResume[] = { /* CHOICE */ 586static const struct field_t _V76LogicalChannelParameters_suspendResume[] = { /* CHOICE */
587 {FNAME("noSuspendResume") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 587 {FNAME("noSuspendResume") NUL, FIXD, 0, 0, SKIP, 0, NULL},
588 {FNAME("suspendResumewAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 588 {FNAME("suspendResumewAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
589 {FNAME("suspendResumewoAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 589 {FNAME("suspendResumewoAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
590}; 590};
591 591
592static field_t _V76LogicalChannelParameters_mode_eRM_recovery[] = { /* CHOICE */ 592static const struct field_t _V76LogicalChannelParameters_mode_eRM_recovery[] = { /* CHOICE */
593 {FNAME("rej") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 593 {FNAME("rej") NUL, FIXD, 0, 0, SKIP, 0, NULL},
594 {FNAME("sREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 594 {FNAME("sREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL},
595 {FNAME("mSREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 595 {FNAME("mSREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL},
596}; 596};
597 597
598static field_t _V76LogicalChannelParameters_mode_eRM[] = { /* SEQUENCE */ 598static const struct field_t _V76LogicalChannelParameters_mode_eRM[] = { /* SEQUENCE */
599 {FNAME("windowSize") INT, 7, 1, 0, SKIP, 0, NULL}, 599 {FNAME("windowSize") INT, 7, 1, 0, SKIP, 0, NULL},
600 {FNAME("recovery") CHOICE, 2, 3, 3, SKIP | EXT, 0, 600 {FNAME("recovery") CHOICE, 2, 3, 3, SKIP | EXT, 0,
601 _V76LogicalChannelParameters_mode_eRM_recovery}, 601 _V76LogicalChannelParameters_mode_eRM_recovery},
602}; 602};
603 603
604static field_t _V76LogicalChannelParameters_mode[] = { /* CHOICE */ 604static const struct field_t _V76LogicalChannelParameters_mode[] = { /* CHOICE */
605 {FNAME("eRM") SEQ, 0, 2, 2, SKIP | EXT, 0, 605 {FNAME("eRM") SEQ, 0, 2, 2, SKIP | EXT, 0,
606 _V76LogicalChannelParameters_mode_eRM}, 606 _V76LogicalChannelParameters_mode_eRM},
607 {FNAME("uNERM") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 607 {FNAME("uNERM") NUL, FIXD, 0, 0, SKIP, 0, NULL},
608}; 608};
609 609
610static field_t _V75Parameters[] = { /* SEQUENCE */ 610static const struct field_t _V75Parameters[] = { /* SEQUENCE */
611 {FNAME("audioHeaderPresent") BOOL, FIXD, 0, 0, SKIP, 0, NULL}, 611 {FNAME("audioHeaderPresent") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
612}; 612};
613 613
614static field_t _V76LogicalChannelParameters[] = { /* SEQUENCE */ 614static const struct field_t _V76LogicalChannelParameters[] = { /* SEQUENCE */
615 {FNAME("hdlcParameters") SEQ, 0, 3, 3, SKIP | EXT, 0, 615 {FNAME("hdlcParameters") SEQ, 0, 3, 3, SKIP | EXT, 0,
616 _V76HDLCParameters}, 616 _V76HDLCParameters},
617 {FNAME("suspendResume") CHOICE, 2, 3, 3, SKIP | EXT, 0, 617 {FNAME("suspendResume") CHOICE, 2, 3, 3, SKIP | EXT, 0,
@@ -622,38 +622,38 @@ static field_t _V76LogicalChannelParameters[] = { /* SEQUENCE */
622 {FNAME("v75Parameters") SEQ, 0, 1, 1, SKIP | EXT, 0, _V75Parameters}, 622 {FNAME("v75Parameters") SEQ, 0, 1, 1, SKIP | EXT, 0, _V75Parameters},
623}; 623};
624 624
625static field_t _H2250LogicalChannelParameters_nonStandard[] = { /* SEQUENCE OF */ 625static const struct field_t _H2250LogicalChannelParameters_nonStandard[] = { /* SEQUENCE OF */
626 {FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter}, 626 {FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter},
627}; 627};
628 628
629static field_t _UnicastAddress_iPAddress[] = { /* SEQUENCE */ 629static const struct field_t _UnicastAddress_iPAddress[] = { /* SEQUENCE */
630 {FNAME("network") OCTSTR, FIXD, 4, 0, DECODE, 630 {FNAME("network") OCTSTR, FIXD, 4, 0, DECODE,
631 offsetof(UnicastAddress_iPAddress, network), NULL}, 631 offsetof(UnicastAddress_iPAddress, network), NULL},
632 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL}, 632 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
633}; 633};
634 634
635static field_t _UnicastAddress_iPXAddress[] = { /* SEQUENCE */ 635static const struct field_t _UnicastAddress_iPXAddress[] = { /* SEQUENCE */
636 {FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL}, 636 {FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL},
637 {FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL}, 637 {FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
638 {FNAME("tsapIdentifier") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL}, 638 {FNAME("tsapIdentifier") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL},
639}; 639};
640 640
641static field_t _UnicastAddress_iP6Address[] = { /* SEQUENCE */ 641static const struct field_t _UnicastAddress_iP6Address[] = { /* SEQUENCE */
642 {FNAME("network") OCTSTR, FIXD, 16, 0, DECODE, 642 {FNAME("network") OCTSTR, FIXD, 16, 0, DECODE,
643 offsetof(UnicastAddress_iP6Address, network), NULL}, 643 offsetof(UnicastAddress_iP6Address, network), NULL},
644 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL}, 644 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
645}; 645};
646 646
647static field_t _UnicastAddress_iPSourceRouteAddress_routing[] = { /* CHOICE */ 647static const struct field_t _UnicastAddress_iPSourceRouteAddress_routing[] = { /* CHOICE */
648 {FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 648 {FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL},
649 {FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 649 {FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL},
650}; 650};
651 651
652static field_t _UnicastAddress_iPSourceRouteAddress_route[] = { /* SEQUENCE OF */ 652static const struct field_t _UnicastAddress_iPSourceRouteAddress_route[] = { /* SEQUENCE OF */
653 {FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL}, 653 {FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
654}; 654};
655 655
656static field_t _UnicastAddress_iPSourceRouteAddress[] = { /* SEQUENCE */ 656static const struct field_t _UnicastAddress_iPSourceRouteAddress[] = { /* SEQUENCE */
657 {FNAME("routing") CHOICE, 1, 2, 2, SKIP, 0, 657 {FNAME("routing") CHOICE, 1, 2, 2, SKIP, 0,
658 _UnicastAddress_iPSourceRouteAddress_routing}, 658 _UnicastAddress_iPSourceRouteAddress_routing},
659 {FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL}, 659 {FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
@@ -662,7 +662,7 @@ static field_t _UnicastAddress_iPSourceRouteAddress[] = { /* SEQUENCE */
662 _UnicastAddress_iPSourceRouteAddress_route}, 662 _UnicastAddress_iPSourceRouteAddress_route},
663}; 663};
664 664
665static field_t _UnicastAddress[] = { /* CHOICE */ 665static const struct field_t _UnicastAddress[] = { /* CHOICE */
666 {FNAME("iPAddress") SEQ, 0, 2, 2, DECODE | EXT, 666 {FNAME("iPAddress") SEQ, 0, 2, 2, DECODE | EXT,
667 offsetof(UnicastAddress, iPAddress), _UnicastAddress_iPAddress}, 667 offsetof(UnicastAddress, iPAddress), _UnicastAddress_iPAddress},
668 {FNAME("iPXAddress") SEQ, 0, 3, 3, SKIP | EXT, 0, 668 {FNAME("iPXAddress") SEQ, 0, 3, 3, SKIP | EXT, 0,
@@ -676,17 +676,17 @@ static field_t _UnicastAddress[] = { /* CHOICE */
676 {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL}, 676 {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL},
677}; 677};
678 678
679static field_t _MulticastAddress_iPAddress[] = { /* SEQUENCE */ 679static const struct field_t _MulticastAddress_iPAddress[] = { /* SEQUENCE */
680 {FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL}, 680 {FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
681 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL}, 681 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
682}; 682};
683 683
684static field_t _MulticastAddress_iP6Address[] = { /* SEQUENCE */ 684static const struct field_t _MulticastAddress_iP6Address[] = { /* SEQUENCE */
685 {FNAME("network") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL}, 685 {FNAME("network") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
686 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL}, 686 {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
687}; 687};
688 688
689static field_t _MulticastAddress[] = { /* CHOICE */ 689static const struct field_t _MulticastAddress[] = { /* CHOICE */
690 {FNAME("iPAddress") SEQ, 0, 2, 2, SKIP | EXT, 0, 690 {FNAME("iPAddress") SEQ, 0, 2, 2, SKIP | EXT, 0,
691 _MulticastAddress_iPAddress}, 691 _MulticastAddress_iPAddress},
692 {FNAME("iP6Address") SEQ, 0, 2, 2, SKIP | EXT, 0, 692 {FNAME("iP6Address") SEQ, 0, 2, 2, SKIP | EXT, 0,
@@ -695,14 +695,14 @@ static field_t _MulticastAddress[] = { /* CHOICE */
695 {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL}, 695 {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL},
696}; 696};
697 697
698static field_t _H245_TransportAddress[] = { /* CHOICE */ 698static const struct field_t _H245_TransportAddress[] = { /* CHOICE */
699 {FNAME("unicastAddress") CHOICE, 3, 5, 7, DECODE | EXT, 699 {FNAME("unicastAddress") CHOICE, 3, 5, 7, DECODE | EXT,
700 offsetof(H245_TransportAddress, unicastAddress), _UnicastAddress}, 700 offsetof(H245_TransportAddress, unicastAddress), _UnicastAddress},
701 {FNAME("multicastAddress") CHOICE, 1, 2, 4, SKIP | EXT, 0, 701 {FNAME("multicastAddress") CHOICE, 1, 2, 4, SKIP | EXT, 0,
702 _MulticastAddress}, 702 _MulticastAddress},
703}; 703};
704 704
705static field_t _H2250LogicalChannelParameters[] = { /* SEQUENCE */ 705static const struct field_t _H2250LogicalChannelParameters[] = { /* SEQUENCE */
706 {FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, 706 {FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
707 _H2250LogicalChannelParameters_nonStandard}, 707 _H2250LogicalChannelParameters_nonStandard},
708 {FNAME("sessionID") INT, BYTE, 0, 0, SKIP, 0, NULL}, 708 {FNAME("sessionID") INT, BYTE, 0, 0, SKIP, 0, NULL},
@@ -728,7 +728,7 @@ static field_t _H2250LogicalChannelParameters[] = { /* SEQUENCE */
728 {FNAME("source") SEQ, 0, 2, 2, SKIP | EXT | OPT, 0, NULL}, 728 {FNAME("source") SEQ, 0, 2, 2, SKIP | EXT | OPT, 0, NULL},
729}; 729};
730 730
731static field_t _OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */ 731static const struct field_t _OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */
732 {FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0, 732 {FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0,
733 _H222LogicalChannelParameters}, 733 _H222LogicalChannelParameters},
734 {FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0, 734 {FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0,
@@ -742,7 +742,7 @@ static field_t _OpenLogicalChannel_forwardLogicalChannelParameters_multiplexPara
742 {FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 742 {FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL},
743}; 743};
744 744
745static field_t _OpenLogicalChannel_forwardLogicalChannelParameters[] = { /* SEQUENCE */ 745static const struct field_t _OpenLogicalChannel_forwardLogicalChannelParameters[] = { /* SEQUENCE */
746 {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL}, 746 {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
747 {FNAME("dataType") CHOICE, 3, 6, 9, DECODE | EXT, 747 {FNAME("dataType") CHOICE, 3, 6, 9, DECODE | EXT,
748 offsetof(OpenLogicalChannel_forwardLogicalChannelParameters, 748 offsetof(OpenLogicalChannel_forwardLogicalChannelParameters,
@@ -756,7 +756,7 @@ static field_t _OpenLogicalChannel_forwardLogicalChannelParameters[] = { /* SEQU
756 {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL}, 756 {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
757}; 757};
758 758
759static field_t _OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */ 759static const struct field_t _OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */
760 {FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0, 760 {FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0,
761 _H223LogicalChannelParameters}, 761 _H223LogicalChannelParameters},
762 {FNAME("v76LogicalChannelParameters") SEQ, 0, 5, 5, SKIP | EXT, 0, 762 {FNAME("v76LogicalChannelParameters") SEQ, 0, 5, 5, SKIP | EXT, 0,
@@ -767,7 +767,7 @@ static field_t _OpenLogicalChannel_reverseLogicalChannelParameters_multiplexPara
767 h2250LogicalChannelParameters), _H2250LogicalChannelParameters}, 767 h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
768}; 768};
769 769
770static field_t _OpenLogicalChannel_reverseLogicalChannelParameters[] = { /* SEQUENCE */ 770static const struct field_t _OpenLogicalChannel_reverseLogicalChannelParameters[] = { /* SEQUENCE */
771 {FNAME("dataType") CHOICE, 3, 6, 9, SKIP | EXT, 0, _DataType}, 771 {FNAME("dataType") CHOICE, 3, 6, 9, SKIP | EXT, 0, _DataType},
772 {FNAME("multiplexParameters") CHOICE, 1, 2, 3, DECODE | EXT | OPT, 772 {FNAME("multiplexParameters") CHOICE, 1, 2, 3, DECODE | EXT | OPT,
773 offsetof(OpenLogicalChannel_reverseLogicalChannelParameters, 773 offsetof(OpenLogicalChannel_reverseLogicalChannelParameters,
@@ -778,23 +778,23 @@ static field_t _OpenLogicalChannel_reverseLogicalChannelParameters[] = { /* SEQU
778 {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL}, 778 {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
779}; 779};
780 780
781static field_t _NetworkAccessParameters_distribution[] = { /* CHOICE */ 781static const struct field_t _NetworkAccessParameters_distribution[] = { /* CHOICE */
782 {FNAME("unicast") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 782 {FNAME("unicast") NUL, FIXD, 0, 0, SKIP, 0, NULL},
783 {FNAME("multicast") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 783 {FNAME("multicast") NUL, FIXD, 0, 0, SKIP, 0, NULL},
784}; 784};
785 785
786static field_t _Q2931Address_address[] = { /* CHOICE */ 786static const struct field_t _Q2931Address_address[] = { /* CHOICE */
787 {FNAME("internationalNumber") NUMSTR, 4, 1, 0, SKIP, 0, NULL}, 787 {FNAME("internationalNumber") NUMSTR, 4, 1, 0, SKIP, 0, NULL},
788 {FNAME("nsapAddress") OCTSTR, 5, 1, 0, SKIP, 0, NULL}, 788 {FNAME("nsapAddress") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
789}; 789};
790 790
791static field_t _Q2931Address[] = { /* SEQUENCE */ 791static const struct field_t _Q2931Address[] = { /* SEQUENCE */
792 {FNAME("address") CHOICE, 1, 2, 2, SKIP | EXT, 0, 792 {FNAME("address") CHOICE, 1, 2, 2, SKIP | EXT, 0,
793 _Q2931Address_address}, 793 _Q2931Address_address},
794 {FNAME("subaddress") OCTSTR, 5, 1, 0, SKIP | OPT, 0, NULL}, 794 {FNAME("subaddress") OCTSTR, 5, 1, 0, SKIP | OPT, 0, NULL},
795}; 795};
796 796
797static field_t _NetworkAccessParameters_networkAddress[] = { /* CHOICE */ 797static const struct field_t _NetworkAccessParameters_networkAddress[] = { /* CHOICE */
798 {FNAME("q2931Address") SEQ, 1, 2, 2, SKIP | EXT, 0, _Q2931Address}, 798 {FNAME("q2931Address") SEQ, 1, 2, 2, SKIP | EXT, 0, _Q2931Address},
799 {FNAME("e164Address") NUMDGT, 7, 1, 0, SKIP, 0, NULL}, 799 {FNAME("e164Address") NUMDGT, 7, 1, 0, SKIP, 0, NULL},
800 {FNAME("localAreaAddress") CHOICE, 1, 2, 2, DECODE | EXT, 800 {FNAME("localAreaAddress") CHOICE, 1, 2, 2, DECODE | EXT,
@@ -802,7 +802,7 @@ static field_t _NetworkAccessParameters_networkAddress[] = { /* CHOICE */
802 _H245_TransportAddress}, 802 _H245_TransportAddress},
803}; 803};
804 804
805static field_t _NetworkAccessParameters[] = { /* SEQUENCE */ 805static const struct field_t _NetworkAccessParameters[] = { /* SEQUENCE */
806 {FNAME("distribution") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0, 806 {FNAME("distribution") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0,
807 _NetworkAccessParameters_distribution}, 807 _NetworkAccessParameters_distribution},
808 {FNAME("networkAddress") CHOICE, 2, 3, 3, DECODE | EXT, 808 {FNAME("networkAddress") CHOICE, 2, 3, 3, DECODE | EXT,
@@ -814,7 +814,7 @@ static field_t _NetworkAccessParameters[] = { /* SEQUENCE */
814 NULL}, 814 NULL},
815}; 815};
816 816
817static field_t _OpenLogicalChannel[] = { /* SEQUENCE */ 817static const struct field_t _OpenLogicalChannel[] = { /* SEQUENCE */
818 {FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL}, 818 {FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
819 {FNAME("forwardLogicalChannelParameters") SEQ, 1, 3, 5, DECODE | EXT, 819 {FNAME("forwardLogicalChannelParameters") SEQ, 1, 3, 5, DECODE | EXT,
820 offsetof(OpenLogicalChannel, forwardLogicalChannelParameters), 820 offsetof(OpenLogicalChannel, forwardLogicalChannelParameters),
@@ -829,13 +829,13 @@ static field_t _OpenLogicalChannel[] = { /* SEQUENCE */
829 {FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL}, 829 {FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
830}; 830};
831 831
832static field_t _Setup_UUIE_fastStart[] = { /* SEQUENCE OF */ 832static const struct field_t _Setup_UUIE_fastStart[] = { /* SEQUENCE OF */
833 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT, 833 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
834 sizeof(OpenLogicalChannel), _OpenLogicalChannel} 834 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
835 , 835 ,
836}; 836};
837 837
838static field_t _Setup_UUIE[] = { /* SEQUENCE */ 838static const struct field_t _Setup_UUIE[] = { /* SEQUENCE */
839 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 839 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
840 {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT, 840 {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
841 offsetof(Setup_UUIE, h245Address), _TransportAddress}, 841 offsetof(Setup_UUIE, h245Address), _TransportAddress},
@@ -894,13 +894,13 @@ static field_t _Setup_UUIE[] = { /* SEQUENCE */
894 NULL}, 894 NULL},
895}; 895};
896 896
897static field_t _CallProceeding_UUIE_fastStart[] = { /* SEQUENCE OF */ 897static const struct field_t _CallProceeding_UUIE_fastStart[] = { /* SEQUENCE OF */
898 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT, 898 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
899 sizeof(OpenLogicalChannel), _OpenLogicalChannel} 899 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
900 , 900 ,
901}; 901};
902 902
903static field_t _CallProceeding_UUIE[] = { /* SEQUENCE */ 903static const struct field_t _CallProceeding_UUIE[] = { /* SEQUENCE */
904 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 904 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
905 {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0, 905 {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
906 _EndpointType}, 906 _EndpointType},
@@ -920,13 +920,13 @@ static field_t _CallProceeding_UUIE[] = { /* SEQUENCE */
920 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL}, 920 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
921}; 921};
922 922
923static field_t _Connect_UUIE_fastStart[] = { /* SEQUENCE OF */ 923static const struct field_t _Connect_UUIE_fastStart[] = { /* SEQUENCE OF */
924 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT, 924 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
925 sizeof(OpenLogicalChannel), _OpenLogicalChannel} 925 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
926 , 926 ,
927}; 927};
928 928
929static field_t _Connect_UUIE[] = { /* SEQUENCE */ 929static const struct field_t _Connect_UUIE[] = { /* SEQUENCE */
930 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 930 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
931 {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT, 931 {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
932 offsetof(Connect_UUIE, h245Address), _TransportAddress}, 932 offsetof(Connect_UUIE, h245Address), _TransportAddress},
@@ -954,13 +954,13 @@ static field_t _Connect_UUIE[] = { /* SEQUENCE */
954 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL}, 954 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
955}; 955};
956 956
957static field_t _Alerting_UUIE_fastStart[] = { /* SEQUENCE OF */ 957static const struct field_t _Alerting_UUIE_fastStart[] = { /* SEQUENCE OF */
958 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT, 958 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
959 sizeof(OpenLogicalChannel), _OpenLogicalChannel} 959 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
960 , 960 ,
961}; 961};
962 962
963static field_t _Alerting_UUIE[] = { /* SEQUENCE */ 963static const struct field_t _Alerting_UUIE[] = { /* SEQUENCE */
964 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 964 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
965 {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0, 965 {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
966 _EndpointType}, 966 _EndpointType},
@@ -986,7 +986,7 @@ static field_t _Alerting_UUIE[] = { /* SEQUENCE */
986 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL}, 986 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
987}; 987};
988 988
989static field_t _Information_UUIE[] = { /* SEQUENCE */ 989static const struct field_t _Information_UUIE[] = { /* SEQUENCE */
990 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 990 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
991 {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL}, 991 {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
992 {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, 992 {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
@@ -996,7 +996,7 @@ static field_t _Information_UUIE[] = { /* SEQUENCE */
996 {FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL}, 996 {FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL},
997}; 997};
998 998
999static field_t _ReleaseCompleteReason[] = { /* CHOICE */ 999static const struct field_t _ReleaseCompleteReason[] = { /* CHOICE */
1000 {FNAME("noBandwidth") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1000 {FNAME("noBandwidth") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1001 {FNAME("gatekeeperResources") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1001 {FNAME("gatekeeperResources") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1002 {FNAME("unreachableDestination") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1002 {FNAME("unreachableDestination") NUL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -1022,7 +1022,7 @@ static field_t _ReleaseCompleteReason[] = { /* CHOICE */
1022 {FNAME("tunnelledSignallingRejected") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1022 {FNAME("tunnelledSignallingRejected") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1023}; 1023};
1024 1024
1025static field_t _ReleaseComplete_UUIE[] = { /* SEQUENCE */ 1025static const struct field_t _ReleaseComplete_UUIE[] = { /* SEQUENCE */
1026 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1026 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1027 {FNAME("reason") CHOICE, 4, 12, 22, SKIP | EXT | OPT, 0, 1027 {FNAME("reason") CHOICE, 4, 12, 22, SKIP | EXT | OPT, 0,
1028 _ReleaseCompleteReason}, 1028 _ReleaseCompleteReason},
@@ -1039,11 +1039,11 @@ static field_t _ReleaseComplete_UUIE[] = { /* SEQUENCE */
1039 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL}, 1039 {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
1040}; 1040};
1041 1041
1042static field_t _Facility_UUIE_alternativeAliasAddress[] = { /* SEQUENCE OF */ 1042static const struct field_t _Facility_UUIE_alternativeAliasAddress[] = { /* SEQUENCE OF */
1043 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1043 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1044}; 1044};
1045 1045
1046static field_t _FacilityReason[] = { /* CHOICE */ 1046static const struct field_t _FacilityReason[] = { /* CHOICE */
1047 {FNAME("routeCallToGatekeeper") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1047 {FNAME("routeCallToGatekeeper") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1048 {FNAME("callForwarded") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1048 {FNAME("callForwarded") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1049 {FNAME("routeCallToMC") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1049 {FNAME("routeCallToMC") NUL, FIXD, 0, 0, SKIP, 0, NULL},
@@ -1057,13 +1057,13 @@ static field_t _FacilityReason[] = { /* CHOICE */
1057 {FNAME("transportedInformation") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1057 {FNAME("transportedInformation") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1058}; 1058};
1059 1059
1060static field_t _Facility_UUIE_fastStart[] = { /* SEQUENCE OF */ 1060static const struct field_t _Facility_UUIE_fastStart[] = { /* SEQUENCE OF */
1061 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT, 1061 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
1062 sizeof(OpenLogicalChannel), _OpenLogicalChannel} 1062 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
1063 , 1063 ,
1064}; 1064};
1065 1065
1066static field_t _Facility_UUIE[] = { /* SEQUENCE */ 1066static const struct field_t _Facility_UUIE[] = { /* SEQUENCE */
1067 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1067 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1068 {FNAME("alternativeAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT, 1068 {FNAME("alternativeAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
1069 offsetof(Facility_UUIE, alternativeAddress), _TransportAddress}, 1069 offsetof(Facility_UUIE, alternativeAddress), _TransportAddress},
@@ -1094,17 +1094,17 @@ static field_t _Facility_UUIE[] = { /* SEQUENCE */
1094 NULL}, 1094 NULL},
1095}; 1095};
1096 1096
1097static field_t _CallIdentifier[] = { /* SEQUENCE */ 1097static const struct field_t _CallIdentifier[] = { /* SEQUENCE */
1098 {FNAME("guid") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL}, 1098 {FNAME("guid") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
1099}; 1099};
1100 1100
1101static field_t _SecurityServiceMode[] = { /* CHOICE */ 1101static const struct field_t _SecurityServiceMode[] = { /* CHOICE */
1102 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter}, 1102 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter},
1103 {FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1103 {FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1104 {FNAME("default") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1104 {FNAME("default") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1105}; 1105};
1106 1106
1107static field_t _SecurityCapabilities[] = { /* SEQUENCE */ 1107static const struct field_t _SecurityCapabilities[] = { /* SEQUENCE */
1108 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP | OPT, 0, 1108 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP | OPT, 0,
1109 _NonStandardParameter}, 1109 _NonStandardParameter},
1110 {FNAME("encryption") CHOICE, 2, 3, 3, SKIP | EXT, 0, 1110 {FNAME("encryption") CHOICE, 2, 3, 3, SKIP | EXT, 0,
@@ -1115,30 +1115,30 @@ static field_t _SecurityCapabilities[] = { /* SEQUENCE */
1115 _SecurityServiceMode}, 1115 _SecurityServiceMode},
1116}; 1116};
1117 1117
1118static field_t _H245Security[] = { /* CHOICE */ 1118static const struct field_t _H245Security[] = { /* CHOICE */
1119 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter}, 1119 {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter},
1120 {FNAME("noSecurity") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1120 {FNAME("noSecurity") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1121 {FNAME("tls") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities}, 1121 {FNAME("tls") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities},
1122 {FNAME("ipsec") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities}, 1122 {FNAME("ipsec") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities},
1123}; 1123};
1124 1124
1125static field_t _DHset[] = { /* SEQUENCE */ 1125static const struct field_t _DHset[] = { /* SEQUENCE */
1126 {FNAME("halfkey") BITSTR, WORD, 0, 0, SKIP, 0, NULL}, 1126 {FNAME("halfkey") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
1127 {FNAME("modSize") BITSTR, WORD, 0, 0, SKIP, 0, NULL}, 1127 {FNAME("modSize") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
1128 {FNAME("generator") BITSTR, WORD, 0, 0, SKIP, 0, NULL}, 1128 {FNAME("generator") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
1129}; 1129};
1130 1130
1131static field_t _TypedCertificate[] = { /* SEQUENCE */ 1131static const struct field_t _TypedCertificate[] = { /* SEQUENCE */
1132 {FNAME("type") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1132 {FNAME("type") OID, BYTE, 0, 0, SKIP, 0, NULL},
1133 {FNAME("certificate") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1133 {FNAME("certificate") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
1134}; 1134};
1135 1135
1136static field_t _H235_NonStandardParameter[] = { /* SEQUENCE */ 1136static const struct field_t _H235_NonStandardParameter[] = { /* SEQUENCE */
1137 {FNAME("nonStandardIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1137 {FNAME("nonStandardIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1138 {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1138 {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
1139}; 1139};
1140 1140
1141static field_t _ClearToken[] = { /* SEQUENCE */ 1141static const struct field_t _ClearToken[] = { /* SEQUENCE */
1142 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1142 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1143 {FNAME("timeStamp") INT, CONS, 1, 0, SKIP | OPT, 0, NULL}, 1143 {FNAME("timeStamp") INT, CONS, 1, 0, SKIP | OPT, 0, NULL},
1144 {FNAME("password") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL}, 1144 {FNAME("password") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
@@ -1154,120 +1154,120 @@ static field_t _ClearToken[] = { /* SEQUENCE */
1154 {FNAME("sendersID") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL}, 1154 {FNAME("sendersID") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
1155}; 1155};
1156 1156
1157static field_t _Progress_UUIE_tokens[] = { /* SEQUENCE OF */ 1157static const struct field_t _Progress_UUIE_tokens[] = { /* SEQUENCE OF */
1158 {FNAME("item") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken}, 1158 {FNAME("item") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken},
1159}; 1159};
1160 1160
1161static field_t _Params[] = { /* SEQUENCE */ 1161static const struct field_t _Params[] = { /* SEQUENCE */
1162 {FNAME("ranInt") INT, UNCO, 0, 0, SKIP | OPT, 0, NULL}, 1162 {FNAME("ranInt") INT, UNCO, 0, 0, SKIP | OPT, 0, NULL},
1163 {FNAME("iv8") OCTSTR, FIXD, 8, 0, SKIP | OPT, 0, NULL}, 1163 {FNAME("iv8") OCTSTR, FIXD, 8, 0, SKIP | OPT, 0, NULL},
1164 {FNAME("iv16") OCTSTR, FIXD, 16, 0, SKIP | OPT, 0, NULL}, 1164 {FNAME("iv16") OCTSTR, FIXD, 16, 0, SKIP | OPT, 0, NULL},
1165}; 1165};
1166 1166
1167static field_t _CryptoH323Token_cryptoEPPwdHash_token[] = { /* SEQUENCE */ 1167static const struct field_t _CryptoH323Token_cryptoEPPwdHash_token[] = { /* SEQUENCE */
1168 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1168 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1169 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1169 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1170 {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1170 {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
1171}; 1171};
1172 1172
1173static field_t _CryptoH323Token_cryptoEPPwdHash[] = { /* SEQUENCE */ 1173static const struct field_t _CryptoH323Token_cryptoEPPwdHash[] = { /* SEQUENCE */
1174 {FNAME("alias") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1174 {FNAME("alias") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1175 {FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL}, 1175 {FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL},
1176 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0, 1176 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
1177 _CryptoH323Token_cryptoEPPwdHash_token}, 1177 _CryptoH323Token_cryptoEPPwdHash_token},
1178}; 1178};
1179 1179
1180static field_t _CryptoH323Token_cryptoGKPwdHash_token[] = { /* SEQUENCE */ 1180static const struct field_t _CryptoH323Token_cryptoGKPwdHash_token[] = { /* SEQUENCE */
1181 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1181 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1182 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1182 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1183 {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1183 {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
1184}; 1184};
1185 1185
1186static field_t _CryptoH323Token_cryptoGKPwdHash[] = { /* SEQUENCE */ 1186static const struct field_t _CryptoH323Token_cryptoGKPwdHash[] = { /* SEQUENCE */
1187 {FNAME("gatekeeperId") BMPSTR, 7, 1, 0, SKIP, 0, NULL}, 1187 {FNAME("gatekeeperId") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
1188 {FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL}, 1188 {FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL},
1189 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0, 1189 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
1190 _CryptoH323Token_cryptoGKPwdHash_token}, 1190 _CryptoH323Token_cryptoGKPwdHash_token},
1191}; 1191};
1192 1192
1193static field_t _CryptoH323Token_cryptoEPPwdEncr[] = { /* SEQUENCE */ 1193static const struct field_t _CryptoH323Token_cryptoEPPwdEncr[] = { /* SEQUENCE */
1194 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1194 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1195 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1195 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1196 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1196 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
1197}; 1197};
1198 1198
1199static field_t _CryptoH323Token_cryptoGKPwdEncr[] = { /* SEQUENCE */ 1199static const struct field_t _CryptoH323Token_cryptoGKPwdEncr[] = { /* SEQUENCE */
1200 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1200 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1201 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1201 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1202 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1202 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
1203}; 1203};
1204 1204
1205static field_t _CryptoH323Token_cryptoEPCert[] = { /* SEQUENCE */ 1205static const struct field_t _CryptoH323Token_cryptoEPCert[] = { /* SEQUENCE */
1206 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL}, 1206 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
1207 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1207 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1208 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1208 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1209 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1209 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
1210}; 1210};
1211 1211
1212static field_t _CryptoH323Token_cryptoGKCert[] = { /* SEQUENCE */ 1212static const struct field_t _CryptoH323Token_cryptoGKCert[] = { /* SEQUENCE */
1213 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL}, 1213 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
1214 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1214 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1215 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1215 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1216 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1216 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
1217}; 1217};
1218 1218
1219static field_t _CryptoH323Token_cryptoFastStart[] = { /* SEQUENCE */ 1219static const struct field_t _CryptoH323Token_cryptoFastStart[] = { /* SEQUENCE */
1220 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL}, 1220 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
1221 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1221 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1222 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1222 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1223 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1223 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
1224}; 1224};
1225 1225
1226static field_t _CryptoToken_cryptoEncryptedToken_token[] = { /* SEQUENCE */ 1226static const struct field_t _CryptoToken_cryptoEncryptedToken_token[] = { /* SEQUENCE */
1227 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1227 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1228 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1228 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1229 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1229 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
1230}; 1230};
1231 1231
1232static field_t _CryptoToken_cryptoEncryptedToken[] = { /* SEQUENCE */ 1232static const struct field_t _CryptoToken_cryptoEncryptedToken[] = { /* SEQUENCE */
1233 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1233 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1234 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0, 1234 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
1235 _CryptoToken_cryptoEncryptedToken_token}, 1235 _CryptoToken_cryptoEncryptedToken_token},
1236}; 1236};
1237 1237
1238static field_t _CryptoToken_cryptoSignedToken_token[] = { /* SEQUENCE */ 1238static const struct field_t _CryptoToken_cryptoSignedToken_token[] = { /* SEQUENCE */
1239 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL}, 1239 {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
1240 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1240 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1241 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1241 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1242 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1242 {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
1243}; 1243};
1244 1244
1245static field_t _CryptoToken_cryptoSignedToken[] = { /* SEQUENCE */ 1245static const struct field_t _CryptoToken_cryptoSignedToken[] = { /* SEQUENCE */
1246 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1246 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1247 {FNAME("token") SEQ, 0, 4, 4, SKIP, 0, 1247 {FNAME("token") SEQ, 0, 4, 4, SKIP, 0,
1248 _CryptoToken_cryptoSignedToken_token}, 1248 _CryptoToken_cryptoSignedToken_token},
1249}; 1249};
1250 1250
1251static field_t _CryptoToken_cryptoHashedToken_token[] = { /* SEQUENCE */ 1251static const struct field_t _CryptoToken_cryptoHashedToken_token[] = { /* SEQUENCE */
1252 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1252 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1253 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1253 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1254 {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1254 {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
1255}; 1255};
1256 1256
1257static field_t _CryptoToken_cryptoHashedToken[] = { /* SEQUENCE */ 1257static const struct field_t _CryptoToken_cryptoHashedToken[] = { /* SEQUENCE */
1258 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1258 {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1259 {FNAME("hashedVals") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken}, 1259 {FNAME("hashedVals") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken},
1260 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0, 1260 {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
1261 _CryptoToken_cryptoHashedToken_token}, 1261 _CryptoToken_cryptoHashedToken_token},
1262}; 1262};
1263 1263
1264static field_t _CryptoToken_cryptoPwdEncr[] = { /* SEQUENCE */ 1264static const struct field_t _CryptoToken_cryptoPwdEncr[] = { /* SEQUENCE */
1265 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1265 {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
1266 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params}, 1266 {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
1267 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL}, 1267 {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
1268}; 1268};
1269 1269
1270static field_t _CryptoToken[] = { /* CHOICE */ 1270static const struct field_t _CryptoToken[] = { /* CHOICE */
1271 {FNAME("cryptoEncryptedToken") SEQ, 0, 2, 2, SKIP, 0, 1271 {FNAME("cryptoEncryptedToken") SEQ, 0, 2, 2, SKIP, 0,
1272 _CryptoToken_cryptoEncryptedToken}, 1272 _CryptoToken_cryptoEncryptedToken},
1273 {FNAME("cryptoSignedToken") SEQ, 0, 2, 2, SKIP, 0, 1273 {FNAME("cryptoSignedToken") SEQ, 0, 2, 2, SKIP, 0,
@@ -1278,7 +1278,7 @@ static field_t _CryptoToken[] = { /* CHOICE */
1278 _CryptoToken_cryptoPwdEncr}, 1278 _CryptoToken_cryptoPwdEncr},
1279}; 1279};
1280 1280
1281static field_t _CryptoH323Token[] = { /* CHOICE */ 1281static const struct field_t _CryptoH323Token[] = { /* CHOICE */
1282 {FNAME("cryptoEPPwdHash") SEQ, 0, 3, 3, SKIP, 0, 1282 {FNAME("cryptoEPPwdHash") SEQ, 0, 3, 3, SKIP, 0,
1283 _CryptoH323Token_cryptoEPPwdHash}, 1283 _CryptoH323Token_cryptoEPPwdHash},
1284 {FNAME("cryptoGKPwdHash") SEQ, 0, 3, 3, SKIP, 0, 1284 {FNAME("cryptoGKPwdHash") SEQ, 0, 3, 3, SKIP, 0,
@@ -1297,17 +1297,17 @@ static field_t _CryptoH323Token[] = { /* CHOICE */
1297 _CryptoToken}, 1297 _CryptoToken},
1298}; 1298};
1299 1299
1300static field_t _Progress_UUIE_cryptoTokens[] = { /* SEQUENCE OF */ 1300static const struct field_t _Progress_UUIE_cryptoTokens[] = { /* SEQUENCE OF */
1301 {FNAME("item") CHOICE, 3, 8, 8, SKIP | EXT, 0, _CryptoH323Token}, 1301 {FNAME("item") CHOICE, 3, 8, 8, SKIP | EXT, 0, _CryptoH323Token},
1302}; 1302};
1303 1303
1304static field_t _Progress_UUIE_fastStart[] = { /* SEQUENCE OF */ 1304static const struct field_t _Progress_UUIE_fastStart[] = { /* SEQUENCE OF */
1305 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT, 1305 {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
1306 sizeof(OpenLogicalChannel), _OpenLogicalChannel} 1306 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
1307 , 1307 ,
1308}; 1308};
1309 1309
1310static field_t _Progress_UUIE[] = { /* SEQUENCE */ 1310static const struct field_t _Progress_UUIE[] = { /* SEQUENCE */
1311 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1311 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1312 {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0, 1312 {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
1313 _EndpointType}, 1313 _EndpointType},
@@ -1328,7 +1328,7 @@ static field_t _Progress_UUIE[] = { /* SEQUENCE */
1328 {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL}, 1328 {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
1329}; 1329};
1330 1330
1331static field_t _H323_UU_PDU_h323_message_body[] = { /* CHOICE */ 1331static const struct field_t _H323_UU_PDU_h323_message_body[] = { /* CHOICE */
1332 {FNAME("setup") SEQ, 7, 13, 39, DECODE | EXT, 1332 {FNAME("setup") SEQ, 7, 13, 39, DECODE | EXT,
1333 offsetof(H323_UU_PDU_h323_message_body, setup), _Setup_UUIE}, 1333 offsetof(H323_UU_PDU_h323_message_body, setup), _Setup_UUIE},
1334 {FNAME("callProceeding") SEQ, 1, 3, 12, DECODE | EXT, 1334 {FNAME("callProceeding") SEQ, 1, 3, 12, DECODE | EXT,
@@ -1352,7 +1352,7 @@ static field_t _H323_UU_PDU_h323_message_body[] = { /* CHOICE */
1352 {FNAME("notify") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL}, 1352 {FNAME("notify") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
1353}; 1353};
1354 1354
1355static field_t _RequestMessage[] = { /* CHOICE */ 1355static const struct field_t _RequestMessage[] = { /* CHOICE */
1356 {FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL}, 1356 {FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
1357 {FNAME("masterSlaveDetermination") SEQ, 0, 2, 2, STOP | EXT, 0, NULL}, 1357 {FNAME("masterSlaveDetermination") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
1358 {FNAME("terminalCapabilitySet") SEQ, 3, 5, 5, STOP | EXT, 0, NULL}, 1358 {FNAME("terminalCapabilitySet") SEQ, 3, 5, 5, STOP | EXT, 0, NULL},
@@ -1372,7 +1372,7 @@ static field_t _RequestMessage[] = { /* CHOICE */
1372 NULL}, 1372 NULL},
1373}; 1373};
1374 1374
1375static field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */ 1375static const struct field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */
1376 {FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0, 1376 {FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0,
1377 _H222LogicalChannelParameters}, 1377 _H222LogicalChannelParameters},
1378 {FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT, 1378 {FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT,
@@ -1381,7 +1381,7 @@ static field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexP
1381 h2250LogicalChannelParameters), _H2250LogicalChannelParameters}, 1381 h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
1382}; 1382};
1383 1383
1384static field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters[] = { /* SEQUENCE */ 1384static const struct field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters[] = { /* SEQUENCE */
1385 {FNAME("reverseLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL}, 1385 {FNAME("reverseLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
1386 {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL}, 1386 {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
1387 {FNAME("multiplexParameters") CHOICE, 0, 1, 2, DECODE | EXT | OPT, 1387 {FNAME("multiplexParameters") CHOICE, 0, 1, 2, DECODE | EXT | OPT,
@@ -1391,11 +1391,11 @@ static field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters[] = { /* S
1391 {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL}, 1391 {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
1392}; 1392};
1393 1393
1394static field_t _H2250LogicalChannelAckParameters_nonStandard[] = { /* SEQUENCE OF */ 1394static const struct field_t _H2250LogicalChannelAckParameters_nonStandard[] = { /* SEQUENCE OF */
1395 {FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter}, 1395 {FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter},
1396}; 1396};
1397 1397
1398static field_t _H2250LogicalChannelAckParameters[] = { /* SEQUENCE */ 1398static const struct field_t _H2250LogicalChannelAckParameters[] = { /* SEQUENCE */
1399 {FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, 1399 {FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
1400 _H2250LogicalChannelAckParameters_nonStandard}, 1400 _H2250LogicalChannelAckParameters_nonStandard},
1401 {FNAME("sessionID") INT, 8, 1, 0, SKIP | OPT, 0, NULL}, 1401 {FNAME("sessionID") INT, 8, 1, 0, SKIP | OPT, 0, NULL},
@@ -1410,14 +1410,14 @@ static field_t _H2250LogicalChannelAckParameters[] = { /* SEQUENCE */
1410 {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL}, 1410 {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
1411}; 1411};
1412 1412
1413static field_t _OpenLogicalChannelAck_forwardMultiplexAckParameters[] = { /* CHOICE */ 1413static const struct field_t _OpenLogicalChannelAck_forwardMultiplexAckParameters[] = { /* CHOICE */
1414 {FNAME("h2250LogicalChannelAckParameters") SEQ, 5, 5, 7, DECODE | EXT, 1414 {FNAME("h2250LogicalChannelAckParameters") SEQ, 5, 5, 7, DECODE | EXT,
1415 offsetof(OpenLogicalChannelAck_forwardMultiplexAckParameters, 1415 offsetof(OpenLogicalChannelAck_forwardMultiplexAckParameters,
1416 h2250LogicalChannelAckParameters), 1416 h2250LogicalChannelAckParameters),
1417 _H2250LogicalChannelAckParameters}, 1417 _H2250LogicalChannelAckParameters},
1418}; 1418};
1419 1419
1420static field_t _OpenLogicalChannelAck[] = { /* SEQUENCE */ 1420static const struct field_t _OpenLogicalChannelAck[] = { /* SEQUENCE */
1421 {FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL}, 1421 {FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
1422 {FNAME("reverseLogicalChannelParameters") SEQ, 2, 3, 4, 1422 {FNAME("reverseLogicalChannelParameters") SEQ, 2, 3, 4,
1423 DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck, 1423 DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck,
@@ -1433,7 +1433,7 @@ static field_t _OpenLogicalChannelAck[] = { /* SEQUENCE */
1433 {FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL}, 1433 {FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
1434}; 1434};
1435 1435
1436static field_t _ResponseMessage[] = { /* CHOICE */ 1436static const struct field_t _ResponseMessage[] = { /* CHOICE */
1437 {FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL}, 1437 {FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
1438 {FNAME("masterSlaveDeterminationAck") SEQ, 0, 1, 1, STOP | EXT, 0, 1438 {FNAME("masterSlaveDeterminationAck") SEQ, 0, 1, 1, STOP | EXT, 0,
1439 NULL}, 1439 NULL},
@@ -1469,7 +1469,7 @@ static field_t _ResponseMessage[] = { /* CHOICE */
1469 {FNAME("logicalChannelRateReject") SEQ, 1, 4, 4, STOP | EXT, 0, NULL}, 1469 {FNAME("logicalChannelRateReject") SEQ, 1, 4, 4, STOP | EXT, 0, NULL},
1470}; 1470};
1471 1471
1472static field_t _MultimediaSystemControlMessage[] = { /* CHOICE */ 1472static const struct field_t _MultimediaSystemControlMessage[] = { /* CHOICE */
1473 {FNAME("request") CHOICE, 4, 11, 15, DECODE | EXT, 1473 {FNAME("request") CHOICE, 4, 11, 15, DECODE | EXT,
1474 offsetof(MultimediaSystemControlMessage, request), _RequestMessage}, 1474 offsetof(MultimediaSystemControlMessage, request), _RequestMessage},
1475 {FNAME("response") CHOICE, 5, 19, 24, DECODE | EXT, 1475 {FNAME("response") CHOICE, 5, 19, 24, DECODE | EXT,
@@ -1479,14 +1479,14 @@ static field_t _MultimediaSystemControlMessage[] = { /* CHOICE */
1479 {FNAME("indication") CHOICE, 4, 14, 23, STOP | EXT, 0, NULL}, 1479 {FNAME("indication") CHOICE, 4, 14, 23, STOP | EXT, 0, NULL},
1480}; 1480};
1481 1481
1482static field_t _H323_UU_PDU_h245Control[] = { /* SEQUENCE OF */ 1482static const struct field_t _H323_UU_PDU_h245Control[] = { /* SEQUENCE OF */
1483 {FNAME("item") CHOICE, 2, 4, 4, DECODE | OPEN | EXT, 1483 {FNAME("item") CHOICE, 2, 4, 4, DECODE | OPEN | EXT,
1484 sizeof(MultimediaSystemControlMessage), 1484 sizeof(MultimediaSystemControlMessage),
1485 _MultimediaSystemControlMessage} 1485 _MultimediaSystemControlMessage}
1486 , 1486 ,
1487}; 1487};
1488 1488
1489static field_t _H323_UU_PDU[] = { /* SEQUENCE */ 1489static const struct field_t _H323_UU_PDU[] = { /* SEQUENCE */
1490 {FNAME("h323-message-body") CHOICE, 3, 7, 13, DECODE | EXT, 1490 {FNAME("h323-message-body") CHOICE, 3, 7, 13, DECODE | EXT,
1491 offsetof(H323_UU_PDU, h323_message_body), 1491 offsetof(H323_UU_PDU, h323_message_body),
1492 _H323_UU_PDU_h323_message_body}, 1492 _H323_UU_PDU_h323_message_body},
@@ -1507,13 +1507,13 @@ static field_t _H323_UU_PDU[] = { /* SEQUENCE */
1507 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1507 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1508}; 1508};
1509 1509
1510static field_t _H323_UserInformation[] = { /* SEQUENCE */ 1510static const struct field_t _H323_UserInformation[] = { /* SEQUENCE */
1511 {FNAME("h323-uu-pdu") SEQ, 1, 2, 11, DECODE | EXT, 1511 {FNAME("h323-uu-pdu") SEQ, 1, 2, 11, DECODE | EXT,
1512 offsetof(H323_UserInformation, h323_uu_pdu), _H323_UU_PDU}, 1512 offsetof(H323_UserInformation, h323_uu_pdu), _H323_UU_PDU},
1513 {FNAME("user-data") SEQ, 0, 2, 2, STOP | EXT | OPT, 0, NULL}, 1513 {FNAME("user-data") SEQ, 0, 2, 2, STOP | EXT | OPT, 0, NULL},
1514}; 1514};
1515 1515
1516static field_t _GatekeeperRequest[] = { /* SEQUENCE */ 1516static const struct field_t _GatekeeperRequest[] = { /* SEQUENCE */
1517 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1517 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1518 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1518 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1519 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 1519 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
@@ -1537,7 +1537,7 @@ static field_t _GatekeeperRequest[] = { /* SEQUENCE */
1537 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1537 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1538}; 1538};
1539 1539
1540static field_t _GatekeeperConfirm[] = { /* SEQUENCE */ 1540static const struct field_t _GatekeeperConfirm[] = { /* SEQUENCE */
1541 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1541 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1542 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1542 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1543 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 1543 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
@@ -1557,23 +1557,23 @@ static field_t _GatekeeperConfirm[] = { /* SEQUENCE */
1557 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1557 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1558}; 1558};
1559 1559
1560static field_t _RegistrationRequest_callSignalAddress[] = { /* SEQUENCE OF */ 1560static const struct field_t _RegistrationRequest_callSignalAddress[] = { /* SEQUENCE OF */
1561 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT, 1561 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
1562 sizeof(TransportAddress), _TransportAddress} 1562 sizeof(TransportAddress), _TransportAddress}
1563 , 1563 ,
1564}; 1564};
1565 1565
1566static field_t _RegistrationRequest_rasAddress[] = { /* SEQUENCE OF */ 1566static const struct field_t _RegistrationRequest_rasAddress[] = { /* SEQUENCE OF */
1567 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT, 1567 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
1568 sizeof(TransportAddress), _TransportAddress} 1568 sizeof(TransportAddress), _TransportAddress}
1569 , 1569 ,
1570}; 1570};
1571 1571
1572static field_t _RegistrationRequest_terminalAlias[] = { /* SEQUENCE OF */ 1572static const struct field_t _RegistrationRequest_terminalAlias[] = { /* SEQUENCE OF */
1573 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1573 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1574}; 1574};
1575 1575
1576static field_t _RegistrationRequest[] = { /* SEQUENCE */ 1576static const struct field_t _RegistrationRequest[] = { /* SEQUENCE */
1577 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1577 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1578 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1578 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1579 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 1579 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
@@ -1621,17 +1621,17 @@ static field_t _RegistrationRequest[] = { /* SEQUENCE */
1621 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1621 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1622}; 1622};
1623 1623
1624static field_t _RegistrationConfirm_callSignalAddress[] = { /* SEQUENCE OF */ 1624static const struct field_t _RegistrationConfirm_callSignalAddress[] = { /* SEQUENCE OF */
1625 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT, 1625 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
1626 sizeof(TransportAddress), _TransportAddress} 1626 sizeof(TransportAddress), _TransportAddress}
1627 , 1627 ,
1628}; 1628};
1629 1629
1630static field_t _RegistrationConfirm_terminalAlias[] = { /* SEQUENCE OF */ 1630static const struct field_t _RegistrationConfirm_terminalAlias[] = { /* SEQUENCE OF */
1631 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1631 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1632}; 1632};
1633 1633
1634static field_t _RegistrationConfirm[] = { /* SEQUENCE */ 1634static const struct field_t _RegistrationConfirm[] = { /* SEQUENCE */
1635 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1635 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1636 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, 1636 {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
1637 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 1637 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
@@ -1667,13 +1667,13 @@ static field_t _RegistrationConfirm[] = { /* SEQUENCE */
1667 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1667 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1668}; 1668};
1669 1669
1670static field_t _UnregistrationRequest_callSignalAddress[] = { /* SEQUENCE OF */ 1670static const struct field_t _UnregistrationRequest_callSignalAddress[] = { /* SEQUENCE OF */
1671 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT, 1671 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
1672 sizeof(TransportAddress), _TransportAddress} 1672 sizeof(TransportAddress), _TransportAddress}
1673 , 1673 ,
1674}; 1674};
1675 1675
1676static field_t _UnregistrationRequest[] = { /* SEQUENCE */ 1676static const struct field_t _UnregistrationRequest[] = { /* SEQUENCE */
1677 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1677 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1678 {FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE, 1678 {FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
1679 offsetof(UnregistrationRequest, callSignalAddress), 1679 offsetof(UnregistrationRequest, callSignalAddress),
@@ -1694,24 +1694,24 @@ static field_t _UnregistrationRequest[] = { /* SEQUENCE */
1694 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1694 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1695}; 1695};
1696 1696
1697static field_t _CallModel[] = { /* CHOICE */ 1697static const struct field_t _CallModel[] = { /* CHOICE */
1698 {FNAME("direct") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1698 {FNAME("direct") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1699 {FNAME("gatekeeperRouted") NUL, FIXD, 0, 0, SKIP, 0, NULL}, 1699 {FNAME("gatekeeperRouted") NUL, FIXD, 0, 0, SKIP, 0, NULL},
1700}; 1700};
1701 1701
1702static field_t _AdmissionRequest_destinationInfo[] = { /* SEQUENCE OF */ 1702static const struct field_t _AdmissionRequest_destinationInfo[] = { /* SEQUENCE OF */
1703 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1703 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1704}; 1704};
1705 1705
1706static field_t _AdmissionRequest_destExtraCallInfo[] = { /* SEQUENCE OF */ 1706static const struct field_t _AdmissionRequest_destExtraCallInfo[] = { /* SEQUENCE OF */
1707 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1707 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1708}; 1708};
1709 1709
1710static field_t _AdmissionRequest_srcInfo[] = { /* SEQUENCE OF */ 1710static const struct field_t _AdmissionRequest_srcInfo[] = { /* SEQUENCE OF */
1711 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1711 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1712}; 1712};
1713 1713
1714static field_t _AdmissionRequest[] = { /* SEQUENCE */ 1714static const struct field_t _AdmissionRequest[] = { /* SEQUENCE */
1715 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1715 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1716 {FNAME("callType") CHOICE, 2, 4, 4, SKIP | EXT, 0, _CallType}, 1716 {FNAME("callType") CHOICE, 2, 4, 4, SKIP | EXT, 0, _CallType},
1717 {FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0, _CallModel}, 1717 {FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0, _CallModel},
@@ -1755,7 +1755,7 @@ static field_t _AdmissionRequest[] = { /* SEQUENCE */
1755 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1755 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1756}; 1756};
1757 1757
1758static field_t _AdmissionConfirm[] = { /* SEQUENCE */ 1758static const struct field_t _AdmissionConfirm[] = { /* SEQUENCE */
1759 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1759 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1760 {FNAME("bandWidth") INT, CONS, 0, 0, SKIP, 0, NULL}, 1760 {FNAME("bandWidth") INT, CONS, 0, 0, SKIP, 0, NULL},
1761 {FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT, 0, _CallModel}, 1761 {FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT, 0, _CallModel},
@@ -1790,11 +1790,11 @@ static field_t _AdmissionConfirm[] = { /* SEQUENCE */
1790 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1790 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1791}; 1791};
1792 1792
1793static field_t _LocationRequest_destinationInfo[] = { /* SEQUENCE OF */ 1793static const struct field_t _LocationRequest_destinationInfo[] = { /* SEQUENCE OF */
1794 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress}, 1794 {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
1795}; 1795};
1796 1796
1797static field_t _LocationRequest[] = { /* SEQUENCE */ 1797static const struct field_t _LocationRequest[] = { /* SEQUENCE */
1798 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1798 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1799 {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL}, 1799 {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
1800 {FNAME("destinationInfo") SEQOF, SEMI, 0, 0, SKIP, 0, 1800 {FNAME("destinationInfo") SEQOF, SEMI, 0, 0, SKIP, 0,
@@ -1818,7 +1818,7 @@ static field_t _LocationRequest[] = { /* SEQUENCE */
1818 {FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL}, 1818 {FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
1819}; 1819};
1820 1820
1821static field_t _LocationConfirm[] = { /* SEQUENCE */ 1821static const struct field_t _LocationConfirm[] = { /* SEQUENCE */
1822 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1822 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
1823 {FNAME("callSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT, 1823 {FNAME("callSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT,
1824 offsetof(LocationConfirm, callSignalAddress), _TransportAddress}, 1824 offsetof(LocationConfirm, callSignalAddress), _TransportAddress},
@@ -1844,13 +1844,13 @@ static field_t _LocationConfirm[] = { /* SEQUENCE */
1844 {FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1844 {FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1845}; 1845};
1846 1846
1847static field_t _InfoRequestResponse_callSignalAddress[] = { /* SEQUENCE OF */ 1847static const struct field_t _InfoRequestResponse_callSignalAddress[] = { /* SEQUENCE OF */
1848 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT, 1848 {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
1849 sizeof(TransportAddress), _TransportAddress} 1849 sizeof(TransportAddress), _TransportAddress}
1850 , 1850 ,
1851}; 1851};
1852 1852
1853static field_t _InfoRequestResponse[] = { /* SEQUENCE */ 1853static const struct field_t _InfoRequestResponse[] = { /* SEQUENCE */
1854 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0, 1854 {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
1855 _NonStandardParameter}, 1855 _NonStandardParameter},
1856 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL}, 1856 {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
@@ -1873,7 +1873,7 @@ static field_t _InfoRequestResponse[] = { /* SEQUENCE */
1873 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL}, 1873 {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
1874}; 1874};
1875 1875
1876static field_t _RasMessage[] = { /* CHOICE */ 1876static const struct field_t _RasMessage[] = { /* CHOICE */
1877 {FNAME("gatekeeperRequest") SEQ, 4, 8, 18, DECODE | EXT, 1877 {FNAME("gatekeeperRequest") SEQ, 4, 8, 18, DECODE | EXT,
1878 offsetof(RasMessage, gatekeeperRequest), _GatekeeperRequest}, 1878 offsetof(RasMessage, gatekeeperRequest), _GatekeeperRequest},
1879 {FNAME("gatekeeperConfirm") SEQ, 2, 5, 14, DECODE | EXT, 1879 {FNAME("gatekeeperConfirm") SEQ, 2, 5, 14, DECODE | EXT,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 96aa637c0932..b1fd21cc1dbc 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -28,6 +28,7 @@
28#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/nf_conntrack_extend.h> 29#include <net/netfilter/nf_conntrack_extend.h>
30 30
31static DEFINE_MUTEX(nf_ct_helper_mutex);
31static struct hlist_head *nf_ct_helper_hash __read_mostly; 32static struct hlist_head *nf_ct_helper_hash __read_mostly;
32static unsigned int nf_ct_helper_hsize __read_mostly; 33static unsigned int nf_ct_helper_hsize __read_mostly;
33static unsigned int nf_ct_helper_count __read_mostly; 34static unsigned int nf_ct_helper_count __read_mostly;
@@ -54,42 +55,13 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
54 return NULL; 55 return NULL;
55 56
56 h = helper_hash(tuple); 57 h = helper_hash(tuple);
57 hlist_for_each_entry(helper, n, &nf_ct_helper_hash[h], hnode) { 58 hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) {
58 if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) 59 if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask))
59 return helper; 60 return helper;
60 } 61 }
61 return NULL; 62 return NULL;
62} 63}
63 64EXPORT_SYMBOL_GPL(__nf_ct_helper_find);
64struct nf_conntrack_helper *
65nf_ct_helper_find_get(const struct nf_conntrack_tuple *tuple)
66{
67 struct nf_conntrack_helper *helper;
68
69 /* need nf_conntrack_lock to assure that helper exists until
70 * try_module_get() is called */
71 read_lock_bh(&nf_conntrack_lock);
72
73 helper = __nf_ct_helper_find(tuple);
74 if (helper) {
75 /* need to increase module usage count to assure helper will
76 * not go away while the caller is e.g. busy putting a
77 * conntrack in the hash that uses the helper */
78 if (!try_module_get(helper->me))
79 helper = NULL;
80 }
81
82 read_unlock_bh(&nf_conntrack_lock);
83
84 return helper;
85}
86EXPORT_SYMBOL_GPL(nf_ct_helper_find_get);
87
88void nf_ct_helper_put(struct nf_conntrack_helper *helper)
89{
90 module_put(helper->me);
91}
92EXPORT_SYMBOL_GPL(nf_ct_helper_put);
93 65
94struct nf_conntrack_helper * 66struct nf_conntrack_helper *
95__nf_conntrack_helper_find_byname(const char *name) 67__nf_conntrack_helper_find_byname(const char *name)
@@ -99,7 +71,7 @@ __nf_conntrack_helper_find_byname(const char *name)
99 unsigned int i; 71 unsigned int i;
100 72
101 for (i = 0; i < nf_ct_helper_hsize; i++) { 73 for (i = 0; i < nf_ct_helper_hsize; i++) {
102 hlist_for_each_entry(h, n, &nf_ct_helper_hash[i], hnode) { 74 hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) {
103 if (!strcmp(h->name, name)) 75 if (!strcmp(h->name, name))
104 return h; 76 return h;
105 } 77 }
@@ -140,10 +112,10 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
140 112
141 BUG_ON(me->timeout == 0); 113 BUG_ON(me->timeout == 0);
142 114
143 write_lock_bh(&nf_conntrack_lock); 115 mutex_lock(&nf_ct_helper_mutex);
144 hlist_add_head(&me->hnode, &nf_ct_helper_hash[h]); 116 hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
145 nf_ct_helper_count++; 117 nf_ct_helper_count++;
146 write_unlock_bh(&nf_conntrack_lock); 118 mutex_unlock(&nf_ct_helper_mutex);
147 119
148 return 0; 120 return 0;
149} 121}
@@ -156,10 +128,17 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
156 struct hlist_node *n, *next; 128 struct hlist_node *n, *next;
157 unsigned int i; 129 unsigned int i;
158 130
159 /* Need write lock here, to delete helper. */ 131 mutex_lock(&nf_ct_helper_mutex);
160 write_lock_bh(&nf_conntrack_lock); 132 hlist_del_rcu(&me->hnode);
161 hlist_del(&me->hnode);
162 nf_ct_helper_count--; 133 nf_ct_helper_count--;
134 mutex_unlock(&nf_ct_helper_mutex);
135
136 /* Make sure every nothing is still using the helper unless its a
137 * connection in the hash.
138 */
139 synchronize_rcu();
140
141 spin_lock_bh(&nf_conntrack_lock);
163 142
164 /* Get rid of expectations */ 143 /* Get rid of expectations */
165 for (i = 0; i < nf_ct_expect_hsize; i++) { 144 for (i = 0; i < nf_ct_expect_hsize; i++) {
@@ -181,10 +160,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
181 hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) 160 hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode)
182 unhelp(h, me); 161 unhelp(h, me);
183 } 162 }
184 write_unlock_bh(&nf_conntrack_lock); 163 spin_unlock_bh(&nf_conntrack_lock);
185
186 /* Someone could be still looking at the helper in a bh. */
187 synchronize_net();
188} 164}
189EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); 165EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
190 166
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index dfaed4ba83cd..c336b07a0d4c 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -23,7 +23,7 @@
23 23
24#define MAX_PORTS 8 24#define MAX_PORTS 8
25static unsigned short ports[MAX_PORTS]; 25static unsigned short ports[MAX_PORTS];
26static int ports_c; 26static unsigned int ports_c;
27static unsigned int max_dcc_channels = 8; 27static unsigned int max_dcc_channels = 8;
28static unsigned int dcc_timeout __read_mostly = 300; 28static unsigned int dcc_timeout __read_mostly = 300;
29/* This is slow, but it's simple. --RR */ 29/* This is slow, but it's simple. --RR */
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 991c52c9a28b..8e914e5ffea8 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -55,12 +55,6 @@ static int generic_print_tuple(struct seq_file *s,
55 return 0; 55 return 0;
56} 56}
57 57
58static int generic_print_conntrack(struct seq_file *s,
59 const struct nf_conn *conntrack)
60{
61 return 0;
62}
63
64static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 58static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
65 unsigned int *dataoff, u_int8_t *protonum) 59 unsigned int *dataoff, u_int8_t *protonum)
66{ 60{
@@ -75,7 +69,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
75 .pkt_to_tuple = generic_pkt_to_tuple, 69 .pkt_to_tuple = generic_pkt_to_tuple,
76 .invert_tuple = generic_invert_tuple, 70 .invert_tuple = generic_invert_tuple,
77 .print_tuple = generic_print_tuple, 71 .print_tuple = generic_print_tuple,
78 .print_conntrack = generic_print_conntrack,
79 .get_l4proto = generic_get_l4proto, 72 .get_l4proto = generic_get_l4proto,
80}; 73};
81EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); 74EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9be1826e6cdd..4a1b42b2b7a5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -59,7 +59,7 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb,
59 nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED); 59 nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED);
60 if (!nest_parms) 60 if (!nest_parms)
61 goto nla_put_failure; 61 goto nla_put_failure;
62 NLA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); 62 NLA_PUT_U8(skb, CTA_PROTO_NUM, tuple->dst.protonum);
63 63
64 if (likely(l4proto->tuple_to_nlattr)) 64 if (likely(l4proto->tuple_to_nlattr))
65 ret = l4proto->tuple_to_nlattr(skb, tuple); 65 ret = l4proto->tuple_to_nlattr(skb, tuple);
@@ -95,7 +95,7 @@ nla_put_failure:
95 return -1; 95 return -1;
96} 96}
97 97
98static inline int 98static int
99ctnetlink_dump_tuples(struct sk_buff *skb, 99ctnetlink_dump_tuples(struct sk_buff *skb,
100 const struct nf_conntrack_tuple *tuple) 100 const struct nf_conntrack_tuple *tuple)
101{ 101{
@@ -120,8 +120,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
120static inline int 120static inline int
121ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) 121ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
122{ 122{
123 __be32 status = htonl((u_int32_t) ct->status); 123 NLA_PUT_BE32(skb, CTA_STATUS, htonl(ct->status));
124 NLA_PUT(skb, CTA_STATUS, sizeof(status), &status);
125 return 0; 124 return 0;
126 125
127nla_put_failure: 126nla_put_failure:
@@ -131,15 +130,12 @@ nla_put_failure:
131static inline int 130static inline int
132ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) 131ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
133{ 132{
134 long timeout_l = ct->timeout.expires - jiffies; 133 long timeout = (ct->timeout.expires - jiffies) / HZ;
135 __be32 timeout;
136 134
137 if (timeout_l < 0) 135 if (timeout < 0)
138 timeout = 0; 136 timeout = 0;
139 else
140 timeout = htonl(timeout_l / HZ);
141 137
142 NLA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); 138 NLA_PUT_BE32(skb, CTA_TIMEOUT, htonl(timeout));
143 return 0; 139 return 0;
144 140
145nla_put_failure: 141nla_put_failure:
@@ -193,7 +189,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
193 nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED); 189 nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED);
194 if (!nest_helper) 190 if (!nest_helper)
195 goto nla_put_failure; 191 goto nla_put_failure;
196 NLA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); 192 NLA_PUT_STRING(skb, CTA_HELP_NAME, helper->name);
197 193
198 if (helper->to_nlattr) 194 if (helper->to_nlattr)
199 helper->to_nlattr(skb, ct); 195 helper->to_nlattr(skb, ct);
@@ -209,23 +205,21 @@ nla_put_failure:
209} 205}
210 206
211#ifdef CONFIG_NF_CT_ACCT 207#ifdef CONFIG_NF_CT_ACCT
212static inline int 208static int
213ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, 209ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
214 enum ip_conntrack_dir dir) 210 enum ip_conntrack_dir dir)
215{ 211{
216 enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; 212 enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
217 struct nlattr *nest_count; 213 struct nlattr *nest_count;
218 __be32 tmp;
219 214
220 nest_count = nla_nest_start(skb, type | NLA_F_NESTED); 215 nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
221 if (!nest_count) 216 if (!nest_count)
222 goto nla_put_failure; 217 goto nla_put_failure;
223 218
224 tmp = htonl(ct->counters[dir].packets); 219 NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS,
225 NLA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); 220 htonl(ct->counters[dir].packets));
226 221 NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES,
227 tmp = htonl(ct->counters[dir].bytes); 222 htonl(ct->counters[dir].bytes));
228 NLA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
229 223
230 nla_nest_end(skb, nest_count); 224 nla_nest_end(skb, nest_count);
231 225
@@ -242,9 +236,7 @@ nla_put_failure:
242static inline int 236static inline int
243ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) 237ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
244{ 238{
245 __be32 mark = htonl(ct->mark); 239 NLA_PUT_BE32(skb, CTA_MARK, htonl(ct->mark));
246
247 NLA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
248 return 0; 240 return 0;
249 241
250nla_put_failure: 242nla_put_failure:
@@ -254,11 +246,95 @@ nla_put_failure:
254#define ctnetlink_dump_mark(a, b) (0) 246#define ctnetlink_dump_mark(a, b) (0)
255#endif 247#endif
256 248
249#ifdef CONFIG_NF_CONNTRACK_SECMARK
250static inline int
251ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct)
252{
253 NLA_PUT_BE32(skb, CTA_SECMARK, htonl(ct->secmark));
254 return 0;
255
256nla_put_failure:
257 return -1;
258}
259#else
260#define ctnetlink_dump_secmark(a, b) (0)
261#endif
262
263#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
264
265static inline int
266ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct)
267{
268 struct nlattr *nest_parms;
269
270 if (!(ct->status & IPS_EXPECTED))
271 return 0;
272
273 nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER | NLA_F_NESTED);
274 if (!nest_parms)
275 goto nla_put_failure;
276 if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0)
277 goto nla_put_failure;
278 nla_nest_end(skb, nest_parms);
279
280 return 0;
281
282nla_put_failure:
283 return -1;
284}
285
286#ifdef CONFIG_NF_NAT_NEEDED
287static int
288dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
289{
290 struct nlattr *nest_parms;
291
292 nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
293 if (!nest_parms)
294 goto nla_put_failure;
295
296 NLA_PUT_BE32(skb, CTA_NAT_SEQ_CORRECTION_POS,
297 htonl(natseq->correction_pos));
298 NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_BEFORE,
299 htonl(natseq->offset_before));
300 NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_AFTER,
301 htonl(natseq->offset_after));
302
303 nla_nest_end(skb, nest_parms);
304
305 return 0;
306
307nla_put_failure:
308 return -1;
309}
310
311static inline int
312ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
313{
314 struct nf_nat_seq *natseq;
315 struct nf_conn_nat *nat = nfct_nat(ct);
316
317 if (!(ct->status & IPS_SEQ_ADJUST) || !nat)
318 return 0;
319
320 natseq = &nat->seq[IP_CT_DIR_ORIGINAL];
321 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1)
322 return -1;
323
324 natseq = &nat->seq[IP_CT_DIR_REPLY];
325 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1)
326 return -1;
327
328 return 0;
329}
330#else
331#define ctnetlink_dump_nat_seq_adj(a, b) (0)
332#endif
333
257static inline int 334static inline int
258ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) 335ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
259{ 336{
260 __be32 id = htonl((unsigned long)ct); 337 NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
261 NLA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
262 return 0; 338 return 0;
263 339
264nla_put_failure: 340nla_put_failure:
@@ -268,9 +344,7 @@ nla_put_failure:
268static inline int 344static inline int
269ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) 345ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
270{ 346{
271 __be32 use = htonl(atomic_read(&ct->ct_general.use)); 347 NLA_PUT_BE32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)));
272
273 NLA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
274 return 0; 348 return 0;
275 349
276nla_put_failure: 350nla_put_failure:
@@ -320,8 +394,11 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
320 ctnetlink_dump_protoinfo(skb, ct) < 0 || 394 ctnetlink_dump_protoinfo(skb, ct) < 0 ||
321 ctnetlink_dump_helpinfo(skb, ct) < 0 || 395 ctnetlink_dump_helpinfo(skb, ct) < 0 ||
322 ctnetlink_dump_mark(skb, ct) < 0 || 396 ctnetlink_dump_mark(skb, ct) < 0 ||
397 ctnetlink_dump_secmark(skb, ct) < 0 ||
323 ctnetlink_dump_id(skb, ct) < 0 || 398 ctnetlink_dump_id(skb, ct) < 0 ||
324 ctnetlink_dump_use(skb, ct) < 0) 399 ctnetlink_dump_use(skb, ct) < 0 ||
400 ctnetlink_dump_master(skb, ct) < 0 ||
401 ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
325 goto nla_put_failure; 402 goto nla_put_failure;
326 403
327 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 404 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -414,9 +491,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
414 && ctnetlink_dump_helpinfo(skb, ct) < 0) 491 && ctnetlink_dump_helpinfo(skb, ct) < 0)
415 goto nla_put_failure; 492 goto nla_put_failure;
416 493
417#ifdef CONFIG_NF_CONNTRACK_MARK 494#ifdef CONFIG_NF_CONNTRACK_SECMARK
418 if ((events & IPCT_MARK || ct->mark) 495 if ((events & IPCT_SECMARK || ct->secmark)
419 && ctnetlink_dump_mark(skb, ct) < 0) 496 && ctnetlink_dump_secmark(skb, ct) < 0)
420 goto nla_put_failure; 497 goto nla_put_failure;
421#endif 498#endif
422 499
@@ -424,8 +501,22 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
424 (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 501 (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
425 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) 502 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
426 goto nla_put_failure; 503 goto nla_put_failure;
504
505 if (events & IPCT_RELATED &&
506 ctnetlink_dump_master(skb, ct) < 0)
507 goto nla_put_failure;
508
509 if (events & IPCT_NATSEQADJ &&
510 ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
511 goto nla_put_failure;
427 } 512 }
428 513
514#ifdef CONFIG_NF_CONNTRACK_MARK
515 if ((events & IPCT_MARK || ct->mark)
516 && ctnetlink_dump_mark(skb, ct) < 0)
517 goto nla_put_failure;
518#endif
519
429 nlh->nlmsg_len = skb->tail - b; 520 nlh->nlmsg_len = skb->tail - b;
430 nfnetlink_send(skb, 0, group, 0); 521 nfnetlink_send(skb, 0, group, 0);
431 return NOTIFY_DONE; 522 return NOTIFY_DONE;
@@ -444,7 +535,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
444 return 0; 535 return 0;
445} 536}
446 537
447#define L3PROTO(ct) ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num 538#define L3PROTO(ct) (ct)->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num
448 539
449static int 540static int
450ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) 541ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
@@ -455,12 +546,12 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
455 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); 546 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
456 u_int8_t l3proto = nfmsg->nfgen_family; 547 u_int8_t l3proto = nfmsg->nfgen_family;
457 548
458 read_lock_bh(&nf_conntrack_lock); 549 rcu_read_lock();
459 last = (struct nf_conn *)cb->args[1]; 550 last = (struct nf_conn *)cb->args[1];
460 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { 551 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
461restart: 552restart:
462 hlist_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], 553 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]],
463 hnode) { 554 hnode) {
464 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 555 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
465 continue; 556 continue;
466 ct = nf_ct_tuplehash_to_ctrack(h); 557 ct = nf_ct_tuplehash_to_ctrack(h);
@@ -478,7 +569,8 @@ restart:
478 cb->nlh->nlmsg_seq, 569 cb->nlh->nlmsg_seq,
479 IPCTNL_MSG_CT_NEW, 570 IPCTNL_MSG_CT_NEW,
480 1, ct) < 0) { 571 1, ct) < 0) {
481 nf_conntrack_get(&ct->ct_general); 572 if (!atomic_inc_not_zero(&ct->ct_general.use))
573 continue;
482 cb->args[1] = (unsigned long)ct; 574 cb->args[1] = (unsigned long)ct;
483 goto out; 575 goto out;
484 } 576 }
@@ -494,7 +586,7 @@ restart:
494 } 586 }
495 } 587 }
496out: 588out:
497 read_unlock_bh(&nf_conntrack_lock); 589 rcu_read_unlock();
498 if (last) 590 if (last)
499 nf_ct_put(last); 591 nf_ct_put(last);
500 592
@@ -542,7 +634,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
542 634
543 if (!tb[CTA_PROTO_NUM]) 635 if (!tb[CTA_PROTO_NUM])
544 return -EINVAL; 636 return -EINVAL;
545 tuple->dst.protonum = *(u_int8_t *)nla_data(tb[CTA_PROTO_NUM]); 637 tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
546 638
547 l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); 639 l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
548 640
@@ -558,7 +650,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
558 return ret; 650 return ret;
559} 651}
560 652
561static inline int 653static int
562ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, 654ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
563 enum ctattr_tuple type, u_int8_t l3num) 655 enum ctattr_tuple type, u_int8_t l3num)
564{ 656{
@@ -605,7 +697,7 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
605 struct nf_nat_range *range) 697 struct nf_nat_range *range)
606{ 698{
607 struct nlattr *tb[CTA_PROTONAT_MAX+1]; 699 struct nlattr *tb[CTA_PROTONAT_MAX+1];
608 struct nf_nat_protocol *npt; 700 const struct nf_nat_protocol *npt;
609 int err; 701 int err;
610 702
611 err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); 703 err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
@@ -647,12 +739,12 @@ nfnetlink_parse_nat(struct nlattr *nat,
647 return err; 739 return err;
648 740
649 if (tb[CTA_NAT_MINIP]) 741 if (tb[CTA_NAT_MINIP])
650 range->min_ip = *(__be32 *)nla_data(tb[CTA_NAT_MINIP]); 742 range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
651 743
652 if (!tb[CTA_NAT_MAXIP]) 744 if (!tb[CTA_NAT_MAXIP])
653 range->max_ip = range->min_ip; 745 range->max_ip = range->min_ip;
654 else 746 else
655 range->max_ip = *(__be32 *)nla_data(tb[CTA_NAT_MAXIP]); 747 range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
656 748
657 if (range->min_ip) 749 if (range->min_ip)
658 range->flags |= IP_NAT_RANGE_MAP_IPS; 750 range->flags |= IP_NAT_RANGE_MAP_IPS;
@@ -722,7 +814,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
722 ct = nf_ct_tuplehash_to_ctrack(h); 814 ct = nf_ct_tuplehash_to_ctrack(h);
723 815
724 if (cda[CTA_ID]) { 816 if (cda[CTA_ID]) {
725 u_int32_t id = ntohl(*(__be32 *)nla_data(cda[CTA_ID])); 817 u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
726 if (id != (u32)(unsigned long)ct) { 818 if (id != (u32)(unsigned long)ct) {
727 nf_ct_put(ct); 819 nf_ct_put(ct);
728 return -ENOENT; 820 return -ENOENT;
@@ -798,11 +890,11 @@ out:
798 return err; 890 return err;
799} 891}
800 892
801static inline int 893static int
802ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) 894ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
803{ 895{
804 unsigned long d; 896 unsigned long d;
805 unsigned int status = ntohl(*(__be32 *)nla_data(cda[CTA_STATUS])); 897 unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
806 d = ct->status ^ status; 898 d = ct->status ^ status;
807 899
808 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) 900 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -828,19 +920,17 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
828 if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct, 920 if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct,
829 &range) < 0) 921 &range) < 0)
830 return -EINVAL; 922 return -EINVAL;
831 if (nf_nat_initialized(ct, 923 if (nf_nat_initialized(ct, IP_NAT_MANIP_DST))
832 HOOK2MANIP(NF_IP_PRE_ROUTING)))
833 return -EEXIST; 924 return -EEXIST;
834 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); 925 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
835 } 926 }
836 if (cda[CTA_NAT_SRC]) { 927 if (cda[CTA_NAT_SRC]) {
837 if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct, 928 if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
838 &range) < 0) 929 &range) < 0)
839 return -EINVAL; 930 return -EINVAL;
840 if (nf_nat_initialized(ct, 931 if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC))
841 HOOK2MANIP(NF_IP_POST_ROUTING)))
842 return -EEXIST; 932 return -EEXIST;
843 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); 933 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
844 } 934 }
845#endif 935#endif
846 } 936 }
@@ -904,7 +994,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
904static inline int 994static inline int
905ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) 995ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[])
906{ 996{
907 u_int32_t timeout = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); 997 u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
908 998
909 if (!del_timer(&ct->timeout)) 999 if (!del_timer(&ct->timeout))
910 return -ETIME; 1000 return -ETIME;
@@ -935,6 +1025,66 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[])
935 return err; 1025 return err;
936} 1026}
937 1027
1028#ifdef CONFIG_NF_NAT_NEEDED
1029static inline int
1030change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr)
1031{
1032 struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
1033
1034 nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL);
1035
1036 if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
1037 return -EINVAL;
1038
1039 natseq->correction_pos =
1040 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS]));
1041
1042 if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
1043 return -EINVAL;
1044
1045 natseq->offset_before =
1046 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
1047
1048 if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
1049 return -EINVAL;
1050
1051 natseq->offset_after =
1052 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
1053
1054 return 0;
1055}
1056
1057static int
1058ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[])
1059{
1060 int ret = 0;
1061 struct nf_conn_nat *nat = nfct_nat(ct);
1062
1063 if (!nat)
1064 return 0;
1065
1066 if (cda[CTA_NAT_SEQ_ADJ_ORIG]) {
1067 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL],
1068 cda[CTA_NAT_SEQ_ADJ_ORIG]);
1069 if (ret < 0)
1070 return ret;
1071
1072 ct->status |= IPS_SEQ_ADJUST;
1073 }
1074
1075 if (cda[CTA_NAT_SEQ_ADJ_REPLY]) {
1076 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY],
1077 cda[CTA_NAT_SEQ_ADJ_REPLY]);
1078 if (ret < 0)
1079 return ret;
1080
1081 ct->status |= IPS_SEQ_ADJUST;
1082 }
1083
1084 return 0;
1085}
1086#endif
1087
938static int 1088static int
939ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) 1089ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
940{ 1090{
@@ -966,7 +1116,15 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
966 1116
967#if defined(CONFIG_NF_CONNTRACK_MARK) 1117#if defined(CONFIG_NF_CONNTRACK_MARK)
968 if (cda[CTA_MARK]) 1118 if (cda[CTA_MARK])
969 ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); 1119 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
1120#endif
1121
1122#ifdef CONFIG_NF_NAT_NEEDED
1123 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
1124 err = ctnetlink_change_nat_seq_adj(ct, cda);
1125 if (err < 0)
1126 return err;
1127 }
970#endif 1128#endif
971 1129
972 return 0; 1130 return 0;
@@ -989,7 +1147,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
989 1147
990 if (!cda[CTA_TIMEOUT]) 1148 if (!cda[CTA_TIMEOUT])
991 goto err; 1149 goto err;
992 ct->timeout.expires = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); 1150 ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
993 1151
994 ct->timeout.expires = jiffies + ct->timeout.expires * HZ; 1152 ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
995 ct->status |= IPS_CONFIRMED; 1153 ct->status |= IPS_CONFIRMED;
@@ -1008,14 +1166,15 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1008 1166
1009#if defined(CONFIG_NF_CONNTRACK_MARK) 1167#if defined(CONFIG_NF_CONNTRACK_MARK)
1010 if (cda[CTA_MARK]) 1168 if (cda[CTA_MARK])
1011 ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); 1169 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
1012#endif 1170#endif
1013 1171
1014 helper = nf_ct_helper_find_get(rtuple); 1172 rcu_read_lock();
1173 helper = __nf_ct_helper_find(rtuple);
1015 if (helper) { 1174 if (helper) {
1016 help = nf_ct_helper_ext_add(ct, GFP_KERNEL); 1175 help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
1017 if (help == NULL) { 1176 if (help == NULL) {
1018 nf_ct_helper_put(helper); 1177 rcu_read_unlock();
1019 err = -ENOMEM; 1178 err = -ENOMEM;
1020 goto err; 1179 goto err;
1021 } 1180 }
@@ -1024,14 +1183,14 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1024 } 1183 }
1025 1184
1026 /* setup master conntrack: this is a confirmed expectation */ 1185 /* setup master conntrack: this is a confirmed expectation */
1027 if (master_ct) 1186 if (master_ct) {
1187 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1028 ct->master = master_ct; 1188 ct->master = master_ct;
1189 }
1029 1190
1030 add_timer(&ct->timeout); 1191 add_timer(&ct->timeout);
1031 nf_conntrack_hash_insert(ct); 1192 nf_conntrack_hash_insert(ct);
1032 1193 rcu_read_unlock();
1033 if (helper)
1034 nf_ct_helper_put(helper);
1035 1194
1036 return 0; 1195 return 0;
1037 1196
@@ -1062,11 +1221,11 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1062 return err; 1221 return err;
1063 } 1222 }
1064 1223
1065 write_lock_bh(&nf_conntrack_lock); 1224 spin_lock_bh(&nf_conntrack_lock);
1066 if (cda[CTA_TUPLE_ORIG]) 1225 if (cda[CTA_TUPLE_ORIG])
1067 h = __nf_conntrack_find(&otuple, NULL); 1226 h = __nf_conntrack_find(&otuple);
1068 else if (cda[CTA_TUPLE_REPLY]) 1227 else if (cda[CTA_TUPLE_REPLY])
1069 h = __nf_conntrack_find(&rtuple, NULL); 1228 h = __nf_conntrack_find(&rtuple);
1070 1229
1071 if (h == NULL) { 1230 if (h == NULL) {
1072 struct nf_conntrack_tuple master; 1231 struct nf_conntrack_tuple master;
@@ -1079,9 +1238,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1079 CTA_TUPLE_MASTER, 1238 CTA_TUPLE_MASTER,
1080 u3); 1239 u3);
1081 if (err < 0) 1240 if (err < 0)
1082 return err; 1241 goto out_unlock;
1083 1242
1084 master_h = __nf_conntrack_find(&master, NULL); 1243 master_h = __nf_conntrack_find(&master);
1085 if (master_h == NULL) { 1244 if (master_h == NULL) {
1086 err = -ENOENT; 1245 err = -ENOENT;
1087 goto out_unlock; 1246 goto out_unlock;
@@ -1090,7 +1249,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1090 atomic_inc(&master_ct->ct_general.use); 1249 atomic_inc(&master_ct->ct_general.use);
1091 } 1250 }
1092 1251
1093 write_unlock_bh(&nf_conntrack_lock); 1252 spin_unlock_bh(&nf_conntrack_lock);
1094 err = -ENOENT; 1253 err = -ENOENT;
1095 if (nlh->nlmsg_flags & NLM_F_CREATE) 1254 if (nlh->nlmsg_flags & NLM_F_CREATE)
1096 err = ctnetlink_create_conntrack(cda, 1255 err = ctnetlink_create_conntrack(cda,
@@ -1123,7 +1282,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1123 } 1282 }
1124 1283
1125out_unlock: 1284out_unlock:
1126 write_unlock_bh(&nf_conntrack_lock); 1285 spin_unlock_bh(&nf_conntrack_lock);
1127 return err; 1286 return err;
1128} 1287}
1129 1288
@@ -1191,13 +1350,15 @@ nla_put_failure:
1191 return -1; 1350 return -1;
1192} 1351}
1193 1352
1194static inline int 1353static int
1195ctnetlink_exp_dump_expect(struct sk_buff *skb, 1354ctnetlink_exp_dump_expect(struct sk_buff *skb,
1196 const struct nf_conntrack_expect *exp) 1355 const struct nf_conntrack_expect *exp)
1197{ 1356{
1198 struct nf_conn *master = exp->master; 1357 struct nf_conn *master = exp->master;
1199 __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); 1358 long timeout = (exp->timeout.expires - jiffies) / HZ;
1200 __be32 id = htonl((unsigned long)exp); 1359
1360 if (timeout < 0)
1361 timeout = 0;
1201 1362
1202 if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) 1363 if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
1203 goto nla_put_failure; 1364 goto nla_put_failure;
@@ -1208,8 +1369,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1208 CTA_EXPECT_MASTER) < 0) 1369 CTA_EXPECT_MASTER) < 0)
1209 goto nla_put_failure; 1370 goto nla_put_failure;
1210 1371
1211 NLA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); 1372 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
1212 NLA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); 1373 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
1213 1374
1214 return 0; 1375 return 0;
1215 1376
@@ -1312,7 +1473,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
1312 struct hlist_node *n; 1473 struct hlist_node *n;
1313 u_int8_t l3proto = nfmsg->nfgen_family; 1474 u_int8_t l3proto = nfmsg->nfgen_family;
1314 1475
1315 read_lock_bh(&nf_conntrack_lock); 1476 rcu_read_lock();
1316 last = (struct nf_conntrack_expect *)cb->args[1]; 1477 last = (struct nf_conntrack_expect *)cb->args[1];
1317 for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { 1478 for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
1318restart: 1479restart:
@@ -1329,7 +1490,8 @@ restart:
1329 cb->nlh->nlmsg_seq, 1490 cb->nlh->nlmsg_seq,
1330 IPCTNL_MSG_EXP_NEW, 1491 IPCTNL_MSG_EXP_NEW,
1331 1, exp) < 0) { 1492 1, exp) < 0) {
1332 atomic_inc(&exp->use); 1493 if (!atomic_inc_not_zero(&exp->use))
1494 continue;
1333 cb->args[1] = (unsigned long)exp; 1495 cb->args[1] = (unsigned long)exp;
1334 goto out; 1496 goto out;
1335 } 1497 }
@@ -1340,7 +1502,7 @@ restart:
1340 } 1502 }
1341 } 1503 }
1342out: 1504out:
1343 read_unlock_bh(&nf_conntrack_lock); 1505 rcu_read_unlock();
1344 if (last) 1506 if (last)
1345 nf_ct_expect_put(last); 1507 nf_ct_expect_put(last);
1346 1508
@@ -1382,7 +1544,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1382 return -ENOENT; 1544 return -ENOENT;
1383 1545
1384 if (cda[CTA_EXPECT_ID]) { 1546 if (cda[CTA_EXPECT_ID]) {
1385 __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); 1547 __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
1386 if (ntohl(id) != (u32)(unsigned long)exp) { 1548 if (ntohl(id) != (u32)(unsigned long)exp) {
1387 nf_ct_expect_put(exp); 1549 nf_ct_expect_put(exp);
1388 return -ENOENT; 1550 return -ENOENT;
@@ -1436,7 +1598,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1436 return -ENOENT; 1598 return -ENOENT;
1437 1599
1438 if (cda[CTA_EXPECT_ID]) { 1600 if (cda[CTA_EXPECT_ID]) {
1439 __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); 1601 __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
1440 if (ntohl(id) != (u32)(unsigned long)exp) { 1602 if (ntohl(id) != (u32)(unsigned long)exp) {
1441 nf_ct_expect_put(exp); 1603 nf_ct_expect_put(exp);
1442 return -ENOENT; 1604 return -ENOENT;
@@ -1453,10 +1615,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1453 struct nf_conn_help *m_help; 1615 struct nf_conn_help *m_help;
1454 1616
1455 /* delete all expectations for this helper */ 1617 /* delete all expectations for this helper */
1456 write_lock_bh(&nf_conntrack_lock); 1618 spin_lock_bh(&nf_conntrack_lock);
1457 h = __nf_conntrack_helper_find_byname(name); 1619 h = __nf_conntrack_helper_find_byname(name);
1458 if (!h) { 1620 if (!h) {
1459 write_unlock_bh(&nf_conntrack_lock); 1621 spin_unlock_bh(&nf_conntrack_lock);
1460 return -EINVAL; 1622 return -EINVAL;
1461 } 1623 }
1462 for (i = 0; i < nf_ct_expect_hsize; i++) { 1624 for (i = 0; i < nf_ct_expect_hsize; i++) {
@@ -1471,10 +1633,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1471 } 1633 }
1472 } 1634 }
1473 } 1635 }
1474 write_unlock_bh(&nf_conntrack_lock); 1636 spin_unlock_bh(&nf_conntrack_lock);
1475 } else { 1637 } else {
1476 /* This basically means we have to flush everything*/ 1638 /* This basically means we have to flush everything*/
1477 write_lock_bh(&nf_conntrack_lock); 1639 spin_lock_bh(&nf_conntrack_lock);
1478 for (i = 0; i < nf_ct_expect_hsize; i++) { 1640 for (i = 0; i < nf_ct_expect_hsize; i++) {
1479 hlist_for_each_entry_safe(exp, n, next, 1641 hlist_for_each_entry_safe(exp, n, next,
1480 &nf_ct_expect_hash[i], 1642 &nf_ct_expect_hash[i],
@@ -1485,7 +1647,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1485 } 1647 }
1486 } 1648 }
1487 } 1649 }
1488 write_unlock_bh(&nf_conntrack_lock); 1650 spin_unlock_bh(&nf_conntrack_lock);
1489 } 1651 }
1490 1652
1491 return 0; 1653 return 0;
@@ -1571,11 +1733,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
1571 if (err < 0) 1733 if (err < 0)
1572 return err; 1734 return err;
1573 1735
1574 write_lock_bh(&nf_conntrack_lock); 1736 spin_lock_bh(&nf_conntrack_lock);
1575 exp = __nf_ct_expect_find(&tuple); 1737 exp = __nf_ct_expect_find(&tuple);
1576 1738
1577 if (!exp) { 1739 if (!exp) {
1578 write_unlock_bh(&nf_conntrack_lock); 1740 spin_unlock_bh(&nf_conntrack_lock);
1579 err = -ENOENT; 1741 err = -ENOENT;
1580 if (nlh->nlmsg_flags & NLM_F_CREATE) 1742 if (nlh->nlmsg_flags & NLM_F_CREATE)
1581 err = ctnetlink_create_expect(cda, u3); 1743 err = ctnetlink_create_expect(cda, u3);
@@ -1585,7 +1747,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
1585 err = -EEXIST; 1747 err = -EEXIST;
1586 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) 1748 if (!(nlh->nlmsg_flags & NLM_F_EXCL))
1587 err = ctnetlink_change_expect(exp, cda); 1749 err = ctnetlink_change_expect(exp, cda);
1588 write_unlock_bh(&nf_conntrack_lock); 1750 spin_unlock_bh(&nf_conntrack_lock);
1589 1751
1590 return err; 1752 return err;
1591} 1753}
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 099b6df3e2b5..b5cb8e831230 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
67 67
68#ifdef DEBUG 68#ifdef DEBUG
69/* PptpControlMessageType names */ 69/* PptpControlMessageType names */
70const char *pptp_msg_name[] = { 70const char *const pptp_msg_name[] = {
71 "UNKNOWN_MESSAGE", 71 "UNKNOWN_MESSAGE",
72 "START_SESSION_REQUEST", 72 "START_SESSION_REQUEST",
73 "START_SESSION_REPLY", 73 "START_SESSION_REPLY",
@@ -136,7 +136,7 @@ static void pptp_expectfn(struct nf_conn *ct,
136 136
137static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) 137static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
138{ 138{
139 struct nf_conntrack_tuple_hash *h; 139 const struct nf_conntrack_tuple_hash *h;
140 struct nf_conntrack_expect *exp; 140 struct nf_conntrack_expect *exp;
141 struct nf_conn *sibling; 141 struct nf_conn *sibling;
142 142
@@ -168,7 +168,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
168/* timeout GRE data connections */ 168/* timeout GRE data connections */
169static void pptp_destroy_siblings(struct nf_conn *ct) 169static void pptp_destroy_siblings(struct nf_conn *ct)
170{ 170{
171 struct nf_conn_help *help = nfct_help(ct); 171 const struct nf_conn_help *help = nfct_help(ct);
172 struct nf_conntrack_tuple t; 172 struct nf_conntrack_tuple t;
173 173
174 nf_ct_gre_keymap_destroy(ct); 174 nf_ct_gre_keymap_destroy(ct);
@@ -497,9 +497,11 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
497 497
498{ 498{
499 int dir = CTINFO2DIR(ctinfo); 499 int dir = CTINFO2DIR(ctinfo);
500 struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; 500 const struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info;
501 struct tcphdr _tcph, *tcph; 501 const struct tcphdr *tcph;
502 struct pptp_pkt_hdr _pptph, *pptph; 502 struct tcphdr _tcph;
503 const struct pptp_pkt_hdr *pptph;
504 struct pptp_pkt_hdr _pptph;
503 struct PptpControlHeader _ctlh, *ctlh; 505 struct PptpControlHeader _ctlh, *ctlh;
504 union pptp_ctrl_union _pptpReq, *pptpReq; 506 union pptp_ctrl_union _pptpReq, *pptpReq;
505 unsigned int tcplen = skb->len - protoff; 507 unsigned int tcplen = skb->len - protoff;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 6d947068c58f..8595b5946acf 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -36,11 +36,11 @@ static DEFINE_MUTEX(nf_ct_proto_mutex);
36 36
37#ifdef CONFIG_SYSCTL 37#ifdef CONFIG_SYSCTL
38static int 38static int
39nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path, 39nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_path *path,
40 struct ctl_table *table, unsigned int *users) 40 struct ctl_table *table, unsigned int *users)
41{ 41{
42 if (*header == NULL) { 42 if (*header == NULL) {
43 *header = nf_register_sysctl_table(path, table); 43 *header = register_sysctl_paths(path, table);
44 if (*header == NULL) 44 if (*header == NULL)
45 return -ENOMEM; 45 return -ENOMEM;
46 } 46 }
@@ -55,7 +55,8 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
55{ 55{
56 if (users != NULL && --*users > 0) 56 if (users != NULL && --*users > 0)
57 return; 57 return;
58 nf_unregister_sysctl_table(*header, table); 58
59 unregister_sysctl_table(*header);
59 *header = NULL; 60 *header = NULL;
60} 61}
61#endif 62#endif
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 13f819179642..55458915575f 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -40,27 +40,20 @@ static int generic_print_tuple(struct seq_file *s,
40 return 0; 40 return 0;
41} 41}
42 42
43/* Print out the private part of the conntrack. */
44static int generic_print_conntrack(struct seq_file *s,
45 const struct nf_conn *state)
46{
47 return 0;
48}
49
50/* Returns verdict for packet, or -1 for invalid. */ 43/* Returns verdict for packet, or -1 for invalid. */
51static int packet(struct nf_conn *conntrack, 44static int packet(struct nf_conn *ct,
52 const struct sk_buff *skb, 45 const struct sk_buff *skb,
53 unsigned int dataoff, 46 unsigned int dataoff,
54 enum ip_conntrack_info ctinfo, 47 enum ip_conntrack_info ctinfo,
55 int pf, 48 int pf,
56 unsigned int hooknum) 49 unsigned int hooknum)
57{ 50{
58 nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout); 51 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout);
59 return NF_ACCEPT; 52 return NF_ACCEPT;
60} 53}
61 54
62/* Called when a new connection for this protocol found. */ 55/* Called when a new connection for this protocol found. */
63static int new(struct nf_conn *conntrack, const struct sk_buff *skb, 56static int new(struct nf_conn *ct, const struct sk_buff *skb,
64 unsigned int dataoff) 57 unsigned int dataoff)
65{ 58{
66 return 1; 59 return 1;
@@ -104,7 +97,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
104 .pkt_to_tuple = generic_pkt_to_tuple, 97 .pkt_to_tuple = generic_pkt_to_tuple,
105 .invert_tuple = generic_invert_tuple, 98 .invert_tuple = generic_invert_tuple,
106 .print_tuple = generic_print_tuple, 99 .print_tuple = generic_print_tuple,
107 .print_conntrack = generic_print_conntrack,
108 .packet = packet, 100 .packet = packet,
109 .new = new, 101 .new = new,
110#ifdef CONFIG_SYSCTL 102#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 4a185f6aa65a..e10024a1b666 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -161,9 +161,11 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb,
161 unsigned int dataoff, 161 unsigned int dataoff,
162 struct nf_conntrack_tuple *tuple) 162 struct nf_conntrack_tuple *tuple)
163{ 163{
164 struct gre_hdr_pptp _pgrehdr, *pgrehdr; 164 const struct gre_hdr_pptp *pgrehdr;
165 struct gre_hdr_pptp _pgrehdr;
165 __be16 srckey; 166 __be16 srckey;
166 struct gre_hdr _grehdr, *grehdr; 167 const struct gre_hdr *grehdr;
168 struct gre_hdr _grehdr;
167 169
168 /* first only delinearize old RFC1701 GRE header */ 170 /* first only delinearize old RFC1701 GRE header */
169 grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); 171 grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index cb0467510592..f9a08370dbb3 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -25,7 +25,7 @@
25#include <net/netfilter/nf_conntrack_l4proto.h> 25#include <net/netfilter/nf_conntrack_l4proto.h>
26#include <net/netfilter/nf_conntrack_ecache.h> 26#include <net/netfilter/nf_conntrack_ecache.h>
27 27
28/* Protects conntrack->proto.sctp */ 28/* Protects ct->proto.sctp */
29static DEFINE_RWLOCK(sctp_lock); 29static DEFINE_RWLOCK(sctp_lock);
30 30
31/* FIXME: Examine ipfilter's timeouts and conntrack transitions more 31/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
@@ -49,24 +49,15 @@ static const char *sctp_conntrack_names[] = {
49#define HOURS * 60 MINS 49#define HOURS * 60 MINS
50#define DAYS * 24 HOURS 50#define DAYS * 24 HOURS
51 51
52static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS; 52static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
53static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; 53 [SCTP_CONNTRACK_CLOSED] = 10 SECS,
54static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; 54 [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
55static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS; 55 [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
56static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; 56 [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
57static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; 57 [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
58static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; 58 [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
59 59 [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
60static unsigned int * sctp_timeouts[] 60};
61= { NULL, /* SCTP_CONNTRACK_NONE */
62 &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
63 &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
64 &nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
65 &nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
66 &nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
67 &nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
68 &nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
69 };
70 61
71#define sNO SCTP_CONNTRACK_NONE 62#define sNO SCTP_CONNTRACK_NONE
72#define sCL SCTP_CONNTRACK_CLOSED 63#define sCL SCTP_CONNTRACK_CLOSED
@@ -110,7 +101,7 @@ cookie echoed to closed.
110*/ 101*/
111 102
112/* SCTP conntrack state transitions */ 103/* SCTP conntrack state transitions */
113static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { 104static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
114 { 105 {
115/* ORIGINAL */ 106/* ORIGINAL */
116/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ 107/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
@@ -173,29 +164,28 @@ static int sctp_print_tuple(struct seq_file *s,
173} 164}
174 165
175/* Print out the private part of the conntrack. */ 166/* Print out the private part of the conntrack. */
176static int sctp_print_conntrack(struct seq_file *s, 167static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
177 const struct nf_conn *conntrack)
178{ 168{
179 enum sctp_conntrack state; 169 enum sctp_conntrack state;
180 170
181 read_lock_bh(&sctp_lock); 171 read_lock_bh(&sctp_lock);
182 state = conntrack->proto.sctp.state; 172 state = ct->proto.sctp.state;
183 read_unlock_bh(&sctp_lock); 173 read_unlock_bh(&sctp_lock);
184 174
185 return seq_printf(s, "%s ", sctp_conntrack_names[state]); 175 return seq_printf(s, "%s ", sctp_conntrack_names[state]);
186} 176}
187 177
188#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ 178#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
189for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \ 179for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0; \
190 offset < skb->len && \ 180 (offset) < (skb)->len && \
191 (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ 181 ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
192 offset += (ntohs(sch->length) + 3) & ~3, count++) 182 (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
193 183
194/* Some validity checks to make sure the chunks are fine */ 184/* Some validity checks to make sure the chunks are fine */
195static int do_basic_checks(struct nf_conn *conntrack, 185static int do_basic_checks(struct nf_conn *ct,
196 const struct sk_buff *skb, 186 const struct sk_buff *skb,
197 unsigned int dataoff, 187 unsigned int dataoff,
198 char *map) 188 unsigned long *map)
199{ 189{
200 u_int32_t offset, count; 190 u_int32_t offset, count;
201 sctp_chunkhdr_t _sch, *sch; 191 sctp_chunkhdr_t _sch, *sch;
@@ -206,76 +196,83 @@ static int do_basic_checks(struct nf_conn *conntrack,
206 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 196 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
207 pr_debug("Chunk Num: %d Type: %d\n", count, sch->type); 197 pr_debug("Chunk Num: %d Type: %d\n", count, sch->type);
208 198
209 if (sch->type == SCTP_CID_INIT 199 if (sch->type == SCTP_CID_INIT ||
210 || sch->type == SCTP_CID_INIT_ACK 200 sch->type == SCTP_CID_INIT_ACK ||
211 || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { 201 sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
212 flag = 1; 202 flag = 1;
213 }
214 203
215 /* 204 /*
216 * Cookie Ack/Echo chunks not the first OR 205 * Cookie Ack/Echo chunks not the first OR
217 * Init / Init Ack / Shutdown compl chunks not the only chunks 206 * Init / Init Ack / Shutdown compl chunks not the only chunks
218 * OR zero-length. 207 * OR zero-length.
219 */ 208 */
220 if (((sch->type == SCTP_CID_COOKIE_ACK 209 if (((sch->type == SCTP_CID_COOKIE_ACK ||
221 || sch->type == SCTP_CID_COOKIE_ECHO 210 sch->type == SCTP_CID_COOKIE_ECHO ||
222 || flag) 211 flag) &&
223 && count !=0) || !sch->length) { 212 count != 0) || !sch->length) {
224 pr_debug("Basic checks failed\n"); 213 pr_debug("Basic checks failed\n");
225 return 1; 214 return 1;
226 } 215 }
227 216
228 if (map) { 217 if (map)
229 set_bit(sch->type, (void *)map); 218 set_bit(sch->type, map);
230 }
231 } 219 }
232 220
233 pr_debug("Basic checks passed\n"); 221 pr_debug("Basic checks passed\n");
234 return count == 0; 222 return count == 0;
235} 223}
236 224
237static int new_state(enum ip_conntrack_dir dir, 225static int sctp_new_state(enum ip_conntrack_dir dir,
238 enum sctp_conntrack cur_state, 226 enum sctp_conntrack cur_state,
239 int chunk_type) 227 int chunk_type)
240{ 228{
241 int i; 229 int i;
242 230
243 pr_debug("Chunk type: %d\n", chunk_type); 231 pr_debug("Chunk type: %d\n", chunk_type);
244 232
245 switch (chunk_type) { 233 switch (chunk_type) {
246 case SCTP_CID_INIT: 234 case SCTP_CID_INIT:
247 pr_debug("SCTP_CID_INIT\n"); 235 pr_debug("SCTP_CID_INIT\n");
248 i = 0; break; 236 i = 0;
249 case SCTP_CID_INIT_ACK: 237 break;
250 pr_debug("SCTP_CID_INIT_ACK\n"); 238 case SCTP_CID_INIT_ACK:
251 i = 1; break; 239 pr_debug("SCTP_CID_INIT_ACK\n");
252 case SCTP_CID_ABORT: 240 i = 1;
253 pr_debug("SCTP_CID_ABORT\n"); 241 break;
254 i = 2; break; 242 case SCTP_CID_ABORT:
255 case SCTP_CID_SHUTDOWN: 243 pr_debug("SCTP_CID_ABORT\n");
256 pr_debug("SCTP_CID_SHUTDOWN\n"); 244 i = 2;
257 i = 3; break; 245 break;
258 case SCTP_CID_SHUTDOWN_ACK: 246 case SCTP_CID_SHUTDOWN:
259 pr_debug("SCTP_CID_SHUTDOWN_ACK\n"); 247 pr_debug("SCTP_CID_SHUTDOWN\n");
260 i = 4; break; 248 i = 3;
261 case SCTP_CID_ERROR: 249 break;
262 pr_debug("SCTP_CID_ERROR\n"); 250 case SCTP_CID_SHUTDOWN_ACK:
263 i = 5; break; 251 pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
264 case SCTP_CID_COOKIE_ECHO: 252 i = 4;
265 pr_debug("SCTP_CID_COOKIE_ECHO\n"); 253 break;
266 i = 6; break; 254 case SCTP_CID_ERROR:
267 case SCTP_CID_COOKIE_ACK: 255 pr_debug("SCTP_CID_ERROR\n");
268 pr_debug("SCTP_CID_COOKIE_ACK\n"); 256 i = 5;
269 i = 7; break; 257 break;
270 case SCTP_CID_SHUTDOWN_COMPLETE: 258 case SCTP_CID_COOKIE_ECHO:
271 pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); 259 pr_debug("SCTP_CID_COOKIE_ECHO\n");
272 i = 8; break; 260 i = 6;
273 default: 261 break;
274 /* Other chunks like DATA, SACK, HEARTBEAT and 262 case SCTP_CID_COOKIE_ACK:
275 its ACK do not cause a change in state */ 263 pr_debug("SCTP_CID_COOKIE_ACK\n");
276 pr_debug("Unknown chunk type, Will stay in %s\n", 264 i = 7;
277 sctp_conntrack_names[cur_state]); 265 break;
278 return cur_state; 266 case SCTP_CID_SHUTDOWN_COMPLETE:
267 pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
268 i = 8;
269 break;
270 default:
271 /* Other chunks like DATA, SACK, HEARTBEAT and
272 its ACK do not cause a change in state */
273 pr_debug("Unknown chunk type, Will stay in %s\n",
274 sctp_conntrack_names[cur_state]);
275 return cur_state;
279 } 276 }
280 277
281 pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", 278 pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
@@ -285,154 +282,145 @@ static int new_state(enum ip_conntrack_dir dir,
285 return sctp_conntracks[dir][i][cur_state]; 282 return sctp_conntracks[dir][i][cur_state];
286} 283}
287 284
288/* Returns verdict for packet, or -1 for invalid. */ 285/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
289static int sctp_packet(struct nf_conn *conntrack, 286static int sctp_packet(struct nf_conn *ct,
290 const struct sk_buff *skb, 287 const struct sk_buff *skb,
291 unsigned int dataoff, 288 unsigned int dataoff,
292 enum ip_conntrack_info ctinfo, 289 enum ip_conntrack_info ctinfo,
293 int pf, 290 int pf,
294 unsigned int hooknum) 291 unsigned int hooknum)
295{ 292{
296 enum sctp_conntrack newconntrack, oldsctpstate; 293 enum sctp_conntrack new_state, old_state;
294 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
297 sctp_sctphdr_t _sctph, *sh; 295 sctp_sctphdr_t _sctph, *sh;
298 sctp_chunkhdr_t _sch, *sch; 296 sctp_chunkhdr_t _sch, *sch;
299 u_int32_t offset, count; 297 u_int32_t offset, count;
300 char map[256 / sizeof (char)] = {0}; 298 unsigned long map[256 / sizeof(unsigned long)] = { 0 };
301 299
302 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); 300 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
303 if (sh == NULL) 301 if (sh == NULL)
304 return -1; 302 goto out;
305 303
306 if (do_basic_checks(conntrack, skb, dataoff, map) != 0) 304 if (do_basic_checks(ct, skb, dataoff, map) != 0)
307 return -1; 305 goto out;
308 306
309 /* Check the verification tag (Sec 8.5) */ 307 /* Check the verification tag (Sec 8.5) */
310 if (!test_bit(SCTP_CID_INIT, (void *)map) 308 if (!test_bit(SCTP_CID_INIT, map) &&
311 && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) 309 !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
312 && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) 310 !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
313 && !test_bit(SCTP_CID_ABORT, (void *)map) 311 !test_bit(SCTP_CID_ABORT, map) &&
314 && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) 312 !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
315 && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { 313 sh->vtag != ct->proto.sctp.vtag[dir]) {
316 pr_debug("Verification tag check failed\n"); 314 pr_debug("Verification tag check failed\n");
317 return -1; 315 goto out;
318 } 316 }
319 317
320 oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; 318 old_state = new_state = SCTP_CONNTRACK_MAX;
319 write_lock_bh(&sctp_lock);
321 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 320 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
322 write_lock_bh(&sctp_lock);
323
324 /* Special cases of Verification tag check (Sec 8.5.1) */ 321 /* Special cases of Verification tag check (Sec 8.5.1) */
325 if (sch->type == SCTP_CID_INIT) { 322 if (sch->type == SCTP_CID_INIT) {
326 /* Sec 8.5.1 (A) */ 323 /* Sec 8.5.1 (A) */
327 if (sh->vtag != 0) { 324 if (sh->vtag != 0)
328 write_unlock_bh(&sctp_lock); 325 goto out_unlock;
329 return -1;
330 }
331 } else if (sch->type == SCTP_CID_ABORT) { 326 } else if (sch->type == SCTP_CID_ABORT) {
332 /* Sec 8.5.1 (B) */ 327 /* Sec 8.5.1 (B) */
333 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) 328 if (sh->vtag != ct->proto.sctp.vtag[dir] &&
334 && !(sh->vtag == conntrack->proto.sctp.vtag 329 sh->vtag != ct->proto.sctp.vtag[!dir])
335 [1 - CTINFO2DIR(ctinfo)])) { 330 goto out_unlock;
336 write_unlock_bh(&sctp_lock);
337 return -1;
338 }
339 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { 331 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
340 /* Sec 8.5.1 (C) */ 332 /* Sec 8.5.1 (C) */
341 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) 333 if (sh->vtag != ct->proto.sctp.vtag[dir] &&
342 && !(sh->vtag == conntrack->proto.sctp.vtag 334 sh->vtag != ct->proto.sctp.vtag[!dir] &&
343 [1 - CTINFO2DIR(ctinfo)] 335 sch->flags & SCTP_CHUNK_FLAG_T)
344 && (sch->flags & 1))) { 336 goto out_unlock;
345 write_unlock_bh(&sctp_lock);
346 return -1;
347 }
348 } else if (sch->type == SCTP_CID_COOKIE_ECHO) { 337 } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
349 /* Sec 8.5.1 (D) */ 338 /* Sec 8.5.1 (D) */
350 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { 339 if (sh->vtag != ct->proto.sctp.vtag[dir])
351 write_unlock_bh(&sctp_lock); 340 goto out_unlock;
352 return -1;
353 }
354 } 341 }
355 342
356 oldsctpstate = conntrack->proto.sctp.state; 343 old_state = ct->proto.sctp.state;
357 newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type); 344 new_state = sctp_new_state(dir, old_state, sch->type);
358 345
359 /* Invalid */ 346 /* Invalid */
360 if (newconntrack == SCTP_CONNTRACK_MAX) { 347 if (new_state == SCTP_CONNTRACK_MAX) {
361 pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u " 348 pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
362 "conntrack=%u\n", 349 "conntrack=%u\n",
363 CTINFO2DIR(ctinfo), sch->type, oldsctpstate); 350 dir, sch->type, old_state);
364 write_unlock_bh(&sctp_lock); 351 goto out_unlock;
365 return -1;
366 } 352 }
367 353
368 /* If it is an INIT or an INIT ACK note down the vtag */ 354 /* If it is an INIT or an INIT ACK note down the vtag */
369 if (sch->type == SCTP_CID_INIT 355 if (sch->type == SCTP_CID_INIT ||
370 || sch->type == SCTP_CID_INIT_ACK) { 356 sch->type == SCTP_CID_INIT_ACK) {
371 sctp_inithdr_t _inithdr, *ih; 357 sctp_inithdr_t _inithdr, *ih;
372 358
373 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), 359 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
374 sizeof(_inithdr), &_inithdr); 360 sizeof(_inithdr), &_inithdr);
375 if (ih == NULL) { 361 if (ih == NULL)
376 write_unlock_bh(&sctp_lock); 362 goto out_unlock;
377 return -1;
378 }
379 pr_debug("Setting vtag %x for dir %d\n", 363 pr_debug("Setting vtag %x for dir %d\n",
380 ih->init_tag, !CTINFO2DIR(ctinfo)); 364 ih->init_tag, !dir);
381 conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; 365 ct->proto.sctp.vtag[!dir] = ih->init_tag;
382 } 366 }
383 367
384 conntrack->proto.sctp.state = newconntrack; 368 ct->proto.sctp.state = new_state;
385 if (oldsctpstate != newconntrack) 369 if (old_state != new_state)
386 nf_conntrack_event_cache(IPCT_PROTOINFO, skb); 370 nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
387 write_unlock_bh(&sctp_lock);
388 } 371 }
372 write_unlock_bh(&sctp_lock);
389 373
390 nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); 374 nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
391 375
392 if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED 376 if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
393 && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY 377 dir == IP_CT_DIR_REPLY &&
394 && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { 378 new_state == SCTP_CONNTRACK_ESTABLISHED) {
395 pr_debug("Setting assured bit\n"); 379 pr_debug("Setting assured bit\n");
396 set_bit(IPS_ASSURED_BIT, &conntrack->status); 380 set_bit(IPS_ASSURED_BIT, &ct->status);
397 nf_conntrack_event_cache(IPCT_STATUS, skb); 381 nf_conntrack_event_cache(IPCT_STATUS, skb);
398 } 382 }
399 383
400 return NF_ACCEPT; 384 return NF_ACCEPT;
385
386out_unlock:
387 write_unlock_bh(&sctp_lock);
388out:
389 return -NF_ACCEPT;
401} 390}
402 391
403/* Called when a new connection for this protocol found. */ 392/* Called when a new connection for this protocol found. */
404static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, 393static int sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
405 unsigned int dataoff) 394 unsigned int dataoff)
406{ 395{
407 enum sctp_conntrack newconntrack; 396 enum sctp_conntrack new_state;
408 sctp_sctphdr_t _sctph, *sh; 397 sctp_sctphdr_t _sctph, *sh;
409 sctp_chunkhdr_t _sch, *sch; 398 sctp_chunkhdr_t _sch, *sch;
410 u_int32_t offset, count; 399 u_int32_t offset, count;
411 char map[256 / sizeof (char)] = {0}; 400 unsigned long map[256 / sizeof(unsigned long)] = { 0 };
412 401
413 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); 402 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
414 if (sh == NULL) 403 if (sh == NULL)
415 return 0; 404 return 0;
416 405
417 if (do_basic_checks(conntrack, skb, dataoff, map) != 0) 406 if (do_basic_checks(ct, skb, dataoff, map) != 0)
418 return 0; 407 return 0;
419 408
420 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ 409 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
421 if ((test_bit (SCTP_CID_ABORT, (void *)map)) 410 if (test_bit(SCTP_CID_ABORT, map) ||
422 || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) 411 test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
423 || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { 412 test_bit(SCTP_CID_COOKIE_ACK, map))
424 return 0; 413 return 0;
425 }
426 414
427 newconntrack = SCTP_CONNTRACK_MAX; 415 new_state = SCTP_CONNTRACK_MAX;
428 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 416 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
429 /* Don't need lock here: this conntrack not in circulation yet */ 417 /* Don't need lock here: this conntrack not in circulation yet */
430 newconntrack = new_state(IP_CT_DIR_ORIGINAL, 418 new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
431 SCTP_CONNTRACK_NONE, sch->type); 419 SCTP_CONNTRACK_NONE, sch->type);
432 420
433 /* Invalid: delete conntrack */ 421 /* Invalid: delete conntrack */
434 if (newconntrack == SCTP_CONNTRACK_NONE || 422 if (new_state == SCTP_CONNTRACK_NONE ||
435 newconntrack == SCTP_CONNTRACK_MAX) { 423 new_state == SCTP_CONNTRACK_MAX) {
436 pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); 424 pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
437 return 0; 425 return 0;
438 } 426 }
@@ -450,7 +438,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
450 pr_debug("Setting vtag %x for new conn\n", 438 pr_debug("Setting vtag %x for new conn\n",
451 ih->init_tag); 439 ih->init_tag);
452 440
453 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = 441 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
454 ih->init_tag; 442 ih->init_tag;
455 } else { 443 } else {
456 /* Sec 8.5.1 (A) */ 444 /* Sec 8.5.1 (A) */
@@ -462,10 +450,10 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
462 else { 450 else {
463 pr_debug("Setting vtag %x for new conn OOTB\n", 451 pr_debug("Setting vtag %x for new conn OOTB\n",
464 sh->vtag); 452 sh->vtag);
465 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; 453 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
466 } 454 }
467 455
468 conntrack->proto.sctp.state = newconntrack; 456 ct->proto.sctp.state = new_state;
469 } 457 }
470 458
471 return 1; 459 return 1;
@@ -477,49 +465,49 @@ static struct ctl_table_header *sctp_sysctl_header;
477static struct ctl_table sctp_sysctl_table[] = { 465static struct ctl_table sctp_sysctl_table[] = {
478 { 466 {
479 .procname = "nf_conntrack_sctp_timeout_closed", 467 .procname = "nf_conntrack_sctp_timeout_closed",
480 .data = &nf_ct_sctp_timeout_closed, 468 .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
481 .maxlen = sizeof(unsigned int), 469 .maxlen = sizeof(unsigned int),
482 .mode = 0644, 470 .mode = 0644,
483 .proc_handler = &proc_dointvec_jiffies, 471 .proc_handler = &proc_dointvec_jiffies,
484 }, 472 },
485 { 473 {
486 .procname = "nf_conntrack_sctp_timeout_cookie_wait", 474 .procname = "nf_conntrack_sctp_timeout_cookie_wait",
487 .data = &nf_ct_sctp_timeout_cookie_wait, 475 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
488 .maxlen = sizeof(unsigned int), 476 .maxlen = sizeof(unsigned int),
489 .mode = 0644, 477 .mode = 0644,
490 .proc_handler = &proc_dointvec_jiffies, 478 .proc_handler = &proc_dointvec_jiffies,
491 }, 479 },
492 { 480 {
493 .procname = "nf_conntrack_sctp_timeout_cookie_echoed", 481 .procname = "nf_conntrack_sctp_timeout_cookie_echoed",
494 .data = &nf_ct_sctp_timeout_cookie_echoed, 482 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
495 .maxlen = sizeof(unsigned int), 483 .maxlen = sizeof(unsigned int),
496 .mode = 0644, 484 .mode = 0644,
497 .proc_handler = &proc_dointvec_jiffies, 485 .proc_handler = &proc_dointvec_jiffies,
498 }, 486 },
499 { 487 {
500 .procname = "nf_conntrack_sctp_timeout_established", 488 .procname = "nf_conntrack_sctp_timeout_established",
501 .data = &nf_ct_sctp_timeout_established, 489 .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
502 .maxlen = sizeof(unsigned int), 490 .maxlen = sizeof(unsigned int),
503 .mode = 0644, 491 .mode = 0644,
504 .proc_handler = &proc_dointvec_jiffies, 492 .proc_handler = &proc_dointvec_jiffies,
505 }, 493 },
506 { 494 {
507 .procname = "nf_conntrack_sctp_timeout_shutdown_sent", 495 .procname = "nf_conntrack_sctp_timeout_shutdown_sent",
508 .data = &nf_ct_sctp_timeout_shutdown_sent, 496 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
509 .maxlen = sizeof(unsigned int), 497 .maxlen = sizeof(unsigned int),
510 .mode = 0644, 498 .mode = 0644,
511 .proc_handler = &proc_dointvec_jiffies, 499 .proc_handler = &proc_dointvec_jiffies,
512 }, 500 },
513 { 501 {
514 .procname = "nf_conntrack_sctp_timeout_shutdown_recd", 502 .procname = "nf_conntrack_sctp_timeout_shutdown_recd",
515 .data = &nf_ct_sctp_timeout_shutdown_recd, 503 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
516 .maxlen = sizeof(unsigned int), 504 .maxlen = sizeof(unsigned int),
517 .mode = 0644, 505 .mode = 0644,
518 .proc_handler = &proc_dointvec_jiffies, 506 .proc_handler = &proc_dointvec_jiffies,
519 }, 507 },
520 { 508 {
521 .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", 509 .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
522 .data = &nf_ct_sctp_timeout_shutdown_ack_sent, 510 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
523 .maxlen = sizeof(unsigned int), 511 .maxlen = sizeof(unsigned int),
524 .mode = 0644, 512 .mode = 0644,
525 .proc_handler = &proc_dointvec_jiffies, 513 .proc_handler = &proc_dointvec_jiffies,
@@ -533,49 +521,49 @@ static struct ctl_table sctp_sysctl_table[] = {
533static struct ctl_table sctp_compat_sysctl_table[] = { 521static struct ctl_table sctp_compat_sysctl_table[] = {
534 { 522 {
535 .procname = "ip_conntrack_sctp_timeout_closed", 523 .procname = "ip_conntrack_sctp_timeout_closed",
536 .data = &nf_ct_sctp_timeout_closed, 524 .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
537 .maxlen = sizeof(unsigned int), 525 .maxlen = sizeof(unsigned int),
538 .mode = 0644, 526 .mode = 0644,
539 .proc_handler = &proc_dointvec_jiffies, 527 .proc_handler = &proc_dointvec_jiffies,
540 }, 528 },
541 { 529 {
542 .procname = "ip_conntrack_sctp_timeout_cookie_wait", 530 .procname = "ip_conntrack_sctp_timeout_cookie_wait",
543 .data = &nf_ct_sctp_timeout_cookie_wait, 531 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
544 .maxlen = sizeof(unsigned int), 532 .maxlen = sizeof(unsigned int),
545 .mode = 0644, 533 .mode = 0644,
546 .proc_handler = &proc_dointvec_jiffies, 534 .proc_handler = &proc_dointvec_jiffies,
547 }, 535 },
548 { 536 {
549 .procname = "ip_conntrack_sctp_timeout_cookie_echoed", 537 .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
550 .data = &nf_ct_sctp_timeout_cookie_echoed, 538 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
551 .maxlen = sizeof(unsigned int), 539 .maxlen = sizeof(unsigned int),
552 .mode = 0644, 540 .mode = 0644,
553 .proc_handler = &proc_dointvec_jiffies, 541 .proc_handler = &proc_dointvec_jiffies,
554 }, 542 },
555 { 543 {
556 .procname = "ip_conntrack_sctp_timeout_established", 544 .procname = "ip_conntrack_sctp_timeout_established",
557 .data = &nf_ct_sctp_timeout_established, 545 .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
558 .maxlen = sizeof(unsigned int), 546 .maxlen = sizeof(unsigned int),
559 .mode = 0644, 547 .mode = 0644,
560 .proc_handler = &proc_dointvec_jiffies, 548 .proc_handler = &proc_dointvec_jiffies,
561 }, 549 },
562 { 550 {
563 .procname = "ip_conntrack_sctp_timeout_shutdown_sent", 551 .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
564 .data = &nf_ct_sctp_timeout_shutdown_sent, 552 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
565 .maxlen = sizeof(unsigned int), 553 .maxlen = sizeof(unsigned int),
566 .mode = 0644, 554 .mode = 0644,
567 .proc_handler = &proc_dointvec_jiffies, 555 .proc_handler = &proc_dointvec_jiffies,
568 }, 556 },
569 { 557 {
570 .procname = "ip_conntrack_sctp_timeout_shutdown_recd", 558 .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
571 .data = &nf_ct_sctp_timeout_shutdown_recd, 559 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
572 .maxlen = sizeof(unsigned int), 560 .maxlen = sizeof(unsigned int),
573 .mode = 0644, 561 .mode = 0644,
574 .proc_handler = &proc_dointvec_jiffies, 562 .proc_handler = &proc_dointvec_jiffies,
575 }, 563 },
576 { 564 {
577 .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", 565 .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
578 .data = &nf_ct_sctp_timeout_shutdown_ack_sent, 566 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
579 .maxlen = sizeof(unsigned int), 567 .maxlen = sizeof(unsigned int),
580 .mode = 0644, 568 .mode = 0644,
581 .proc_handler = &proc_dointvec_jiffies, 569 .proc_handler = &proc_dointvec_jiffies,
@@ -598,6 +586,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
598 .packet = sctp_packet, 586 .packet = sctp_packet,
599 .new = sctp_new, 587 .new = sctp_new,
600 .me = THIS_MODULE, 588 .me = THIS_MODULE,
589#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
590 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
591 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
592 .nla_policy = nf_ct_port_nla_policy,
593#endif
601#ifdef CONFIG_SYSCTL 594#ifdef CONFIG_SYSCTL
602 .ctl_table_users = &sctp_sysctl_table_users, 595 .ctl_table_users = &sctp_sysctl_table_users,
603 .ctl_table_header = &sctp_sysctl_header, 596 .ctl_table_header = &sctp_sysctl_header,
@@ -619,6 +612,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
619 .packet = sctp_packet, 612 .packet = sctp_packet,
620 .new = sctp_new, 613 .new = sctp_new,
621 .me = THIS_MODULE, 614 .me = THIS_MODULE,
615#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
616 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
617 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
618 .nla_policy = nf_ct_port_nla_policy,
619#endif
622#ifdef CONFIG_SYSCTL 620#ifdef CONFIG_SYSCTL
623 .ctl_table_users = &sctp_sysctl_table_users, 621 .ctl_table_users = &sctp_sysctl_table_users,
624 .ctl_table_header = &sctp_sysctl_header, 622 .ctl_table_header = &sctp_sysctl_header,
@@ -626,7 +624,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
626#endif 624#endif
627}; 625};
628 626
629int __init nf_conntrack_proto_sctp_init(void) 627static int __init nf_conntrack_proto_sctp_init(void)
630{ 628{
631 int ret; 629 int ret;
632 630
@@ -649,7 +647,7 @@ int __init nf_conntrack_proto_sctp_init(void)
649 return ret; 647 return ret;
650} 648}
651 649
652void __exit nf_conntrack_proto_sctp_fini(void) 650static void __exit nf_conntrack_proto_sctp_fini(void)
653{ 651{
654 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6); 652 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
655 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); 653 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 7a3f64c1aca6..3e0cccae5636 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -24,8 +24,9 @@
24#include <net/netfilter/nf_conntrack.h> 24#include <net/netfilter/nf_conntrack.h>
25#include <net/netfilter/nf_conntrack_l4proto.h> 25#include <net/netfilter/nf_conntrack_l4proto.h>
26#include <net/netfilter/nf_conntrack_ecache.h> 26#include <net/netfilter/nf_conntrack_ecache.h>
27#include <net/netfilter/nf_log.h>
27 28
28/* Protects conntrack->proto.tcp */ 29/* Protects ct->proto.tcp */
29static DEFINE_RWLOCK(tcp_lock); 30static DEFINE_RWLOCK(tcp_lock);
30 31
31/* "Be conservative in what you do, 32/* "Be conservative in what you do,
@@ -45,7 +46,7 @@ static int nf_ct_tcp_max_retrans __read_mostly = 3;
45 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 46 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
46 closely. They're more complex. --RR */ 47 closely. They're more complex. --RR */
47 48
48static const char *tcp_conntrack_names[] = { 49static const char *const tcp_conntrack_names[] = {
49 "NONE", 50 "NONE",
50 "SYN_SENT", 51 "SYN_SENT",
51 "SYN_RECV", 52 "SYN_RECV",
@@ -63,32 +64,21 @@ static const char *tcp_conntrack_names[] = {
63#define HOURS * 60 MINS 64#define HOURS * 60 MINS
64#define DAYS * 24 HOURS 65#define DAYS * 24 HOURS
65 66
66static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
67static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
68static unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS;
69static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
70static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
71static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
72static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
73static unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS;
74
75/* RFC1122 says the R2 limit should be at least 100 seconds. 67/* RFC1122 says the R2 limit should be at least 100 seconds.
76 Linux uses 15 packets as limit, which corresponds 68 Linux uses 15 packets as limit, which corresponds
77 to ~13-30min depending on RTO. */ 69 to ~13-30min depending on RTO. */
78static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; 70static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
79 71
80static unsigned int * tcp_timeouts[] = { 72static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
81 NULL, /* TCP_CONNTRACK_NONE */ 73 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
82 &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ 74 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
83 &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ 75 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
84 &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ 76 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
85 &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ 77 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
86 &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ 78 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
87 &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ 79 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
88 &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ 80 [TCP_CONNTRACK_CLOSE] = 10 SECS,
89 &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ 81};
90 NULL, /* TCP_CONNTRACK_LISTEN */
91 };
92 82
93#define sNO TCP_CONNTRACK_NONE 83#define sNO TCP_CONNTRACK_NONE
94#define sSS TCP_CONNTRACK_SYN_SENT 84#define sSS TCP_CONNTRACK_SYN_SENT
@@ -148,7 +138,7 @@ enum tcp_bit_set {
148 * if they are invalid 138 * if they are invalid
149 * or we do not support the request (simultaneous open) 139 * or we do not support the request (simultaneous open)
150 */ 140 */
151static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { 141static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
152 { 142 {
153/* ORIGINAL */ 143/* ORIGINAL */
154/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 144/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
@@ -271,7 +261,8 @@ static int tcp_pkt_to_tuple(const struct sk_buff *skb,
271 unsigned int dataoff, 261 unsigned int dataoff,
272 struct nf_conntrack_tuple *tuple) 262 struct nf_conntrack_tuple *tuple)
273{ 263{
274 struct tcphdr _hdr, *hp; 264 const struct tcphdr *hp;
265 struct tcphdr _hdr;
275 266
276 /* Actually only need first 8 bytes. */ 267 /* Actually only need first 8 bytes. */
277 hp = skb_header_pointer(skb, dataoff, 8, &_hdr); 268 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
@@ -302,13 +293,12 @@ static int tcp_print_tuple(struct seq_file *s,
302} 293}
303 294
304/* Print out the private part of the conntrack. */ 295/* Print out the private part of the conntrack. */
305static int tcp_print_conntrack(struct seq_file *s, 296static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
306 const struct nf_conn *conntrack)
307{ 297{
308 enum tcp_conntrack state; 298 enum tcp_conntrack state;
309 299
310 read_lock_bh(&tcp_lock); 300 read_lock_bh(&tcp_lock);
311 state = conntrack->proto.tcp.state; 301 state = ct->proto.tcp.state;
312 read_unlock_bh(&tcp_lock); 302 read_unlock_bh(&tcp_lock);
313 303
314 return seq_printf(s, "%s ", tcp_conntrack_names[state]); 304 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
@@ -354,7 +344,7 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
354static inline __u32 segment_seq_plus_len(__u32 seq, 344static inline __u32 segment_seq_plus_len(__u32 seq,
355 size_t len, 345 size_t len,
356 unsigned int dataoff, 346 unsigned int dataoff,
357 struct tcphdr *tcph) 347 const struct tcphdr *tcph)
358{ 348{
359 /* XXX Should I use payload length field in IP/IPv6 header ? 349 /* XXX Should I use payload length field in IP/IPv6 header ?
360 * - YK */ 350 * - YK */
@@ -373,11 +363,11 @@ static inline __u32 segment_seq_plus_len(__u32 seq,
373 */ 363 */
374static void tcp_options(const struct sk_buff *skb, 364static void tcp_options(const struct sk_buff *skb,
375 unsigned int dataoff, 365 unsigned int dataoff,
376 struct tcphdr *tcph, 366 const struct tcphdr *tcph,
377 struct ip_ct_tcp_state *state) 367 struct ip_ct_tcp_state *state)
378{ 368{
379 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 369 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
380 unsigned char *ptr; 370 const unsigned char *ptr;
381 int length = (tcph->doff*4) - sizeof(struct tcphdr); 371 int length = (tcph->doff*4) - sizeof(struct tcphdr);
382 372
383 if (!length) 373 if (!length)
@@ -428,10 +418,10 @@ static void tcp_options(const struct sk_buff *skb,
428} 418}
429 419
430static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, 420static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
431 struct tcphdr *tcph, __u32 *sack) 421 const struct tcphdr *tcph, __u32 *sack)
432{ 422{
433 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 423 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
434 unsigned char *ptr; 424 const unsigned char *ptr;
435 int length = (tcph->doff*4) - sizeof(struct tcphdr); 425 int length = (tcph->doff*4) - sizeof(struct tcphdr);
436 __u32 tmp; 426 __u32 tmp;
437 427
@@ -488,18 +478,18 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
488 } 478 }
489} 479}
490 480
491static int tcp_in_window(struct nf_conn *ct, 481static int tcp_in_window(const struct nf_conn *ct,
492 struct ip_ct_tcp *state, 482 struct ip_ct_tcp *state,
493 enum ip_conntrack_dir dir, 483 enum ip_conntrack_dir dir,
494 unsigned int index, 484 unsigned int index,
495 const struct sk_buff *skb, 485 const struct sk_buff *skb,
496 unsigned int dataoff, 486 unsigned int dataoff,
497 struct tcphdr *tcph, 487 const struct tcphdr *tcph,
498 int pf) 488 int pf)
499{ 489{
500 struct ip_ct_tcp_state *sender = &state->seen[dir]; 490 struct ip_ct_tcp_state *sender = &state->seen[dir];
501 struct ip_ct_tcp_state *receiver = &state->seen[!dir]; 491 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
502 struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; 492 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
503 __u32 seq, ack, sack, end, win, swin; 493 __u32 seq, ack, sack, end, win, swin;
504 int res; 494 int res;
505 495
@@ -697,14 +687,14 @@ static int tcp_in_window(struct nf_conn *ct,
697#ifdef CONFIG_NF_NAT_NEEDED 687#ifdef CONFIG_NF_NAT_NEEDED
698/* Update sender->td_end after NAT successfully mangled the packet */ 688/* Update sender->td_end after NAT successfully mangled the packet */
699/* Caller must linearize skb at tcp header. */ 689/* Caller must linearize skb at tcp header. */
700void nf_conntrack_tcp_update(struct sk_buff *skb, 690void nf_conntrack_tcp_update(const struct sk_buff *skb,
701 unsigned int dataoff, 691 unsigned int dataoff,
702 struct nf_conn *conntrack, 692 struct nf_conn *ct,
703 int dir) 693 int dir)
704{ 694{
705 struct tcphdr *tcph = (void *)skb->data + dataoff; 695 const struct tcphdr *tcph = (const void *)skb->data + dataoff;
706 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; 696 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
707 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; 697 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
708 __u32 end; 698 __u32 end;
709 699
710 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); 700 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
@@ -713,9 +703,9 @@ void nf_conntrack_tcp_update(struct sk_buff *skb,
713 /* 703 /*
714 * We have to worry for the ack in the reply packet only... 704 * We have to worry for the ack in the reply packet only...
715 */ 705 */
716 if (after(end, conntrack->proto.tcp.seen[dir].td_end)) 706 if (after(end, ct->proto.tcp.seen[dir].td_end))
717 conntrack->proto.tcp.seen[dir].td_end = end; 707 ct->proto.tcp.seen[dir].td_end = end;
718 conntrack->proto.tcp.last_end = end; 708 ct->proto.tcp.last_end = end;
719 write_unlock_bh(&tcp_lock); 709 write_unlock_bh(&tcp_lock);
720 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " 710 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
721 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 711 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
@@ -737,7 +727,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
737#define TH_CWR 0x80 727#define TH_CWR 0x80
738 728
739/* table of valid flag combinations - PUSH, ECE and CWR are always valid */ 729/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
740static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = 730static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
741{ 731{
742 [TH_SYN] = 1, 732 [TH_SYN] = 1,
743 [TH_SYN|TH_URG] = 1, 733 [TH_SYN|TH_URG] = 1,
@@ -757,7 +747,8 @@ static int tcp_error(struct sk_buff *skb,
757 int pf, 747 int pf,
758 unsigned int hooknum) 748 unsigned int hooknum)
759{ 749{
760 struct tcphdr _tcph, *th; 750 const struct tcphdr *th;
751 struct tcphdr _tcph;
761 unsigned int tcplen = skb->len - dataoff; 752 unsigned int tcplen = skb->len - dataoff;
762 u_int8_t tcpflags; 753 u_int8_t tcpflags;
763 754
@@ -783,9 +774,7 @@ static int tcp_error(struct sk_buff *skb,
783 * because the checksum is assumed to be correct. 774 * because the checksum is assumed to be correct.
784 */ 775 */
785 /* FIXME: Source route IP option packets --RR */ 776 /* FIXME: Source route IP option packets --RR */
786 if (nf_conntrack_checksum && 777 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
787 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
788 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
789 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 778 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
790 if (LOG_INVALID(IPPROTO_TCP)) 779 if (LOG_INVALID(IPPROTO_TCP))
791 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 780 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -806,7 +795,7 @@ static int tcp_error(struct sk_buff *skb,
806} 795}
807 796
808/* Returns verdict for packet, or -1 for invalid. */ 797/* Returns verdict for packet, or -1 for invalid. */
809static int tcp_packet(struct nf_conn *conntrack, 798static int tcp_packet(struct nf_conn *ct,
810 const struct sk_buff *skb, 799 const struct sk_buff *skb,
811 unsigned int dataoff, 800 unsigned int dataoff,
812 enum ip_conntrack_info ctinfo, 801 enum ip_conntrack_info ctinfo,
@@ -816,7 +805,8 @@ static int tcp_packet(struct nf_conn *conntrack,
816 struct nf_conntrack_tuple *tuple; 805 struct nf_conntrack_tuple *tuple;
817 enum tcp_conntrack new_state, old_state; 806 enum tcp_conntrack new_state, old_state;
818 enum ip_conntrack_dir dir; 807 enum ip_conntrack_dir dir;
819 struct tcphdr *th, _tcph; 808 const struct tcphdr *th;
809 struct tcphdr _tcph;
820 unsigned long timeout; 810 unsigned long timeout;
821 unsigned int index; 811 unsigned int index;
822 812
@@ -824,26 +814,24 @@ static int tcp_packet(struct nf_conn *conntrack,
824 BUG_ON(th == NULL); 814 BUG_ON(th == NULL);
825 815
826 write_lock_bh(&tcp_lock); 816 write_lock_bh(&tcp_lock);
827 old_state = conntrack->proto.tcp.state; 817 old_state = ct->proto.tcp.state;
828 dir = CTINFO2DIR(ctinfo); 818 dir = CTINFO2DIR(ctinfo);
829 index = get_conntrack_index(th); 819 index = get_conntrack_index(th);
830 new_state = tcp_conntracks[dir][index][old_state]; 820 new_state = tcp_conntracks[dir][index][old_state];
831 tuple = &conntrack->tuplehash[dir].tuple; 821 tuple = &ct->tuplehash[dir].tuple;
832 822
833 switch (new_state) { 823 switch (new_state) {
834 case TCP_CONNTRACK_SYN_SENT: 824 case TCP_CONNTRACK_SYN_SENT:
835 if (old_state < TCP_CONNTRACK_TIME_WAIT) 825 if (old_state < TCP_CONNTRACK_TIME_WAIT)
836 break; 826 break;
837 if ((conntrack->proto.tcp.seen[!dir].flags & 827 if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_CLOSE_INIT)
838 IP_CT_TCP_FLAG_CLOSE_INIT) 828 || (ct->proto.tcp.last_dir == dir
839 || (conntrack->proto.tcp.last_dir == dir 829 && ct->proto.tcp.last_index == TCP_RST_SET)) {
840 && conntrack->proto.tcp.last_index == TCP_RST_SET)) {
841 /* Attempt to reopen a closed/aborted connection. 830 /* Attempt to reopen a closed/aborted connection.
842 * Delete this connection and look up again. */ 831 * Delete this connection and look up again. */
843 write_unlock_bh(&tcp_lock); 832 write_unlock_bh(&tcp_lock);
844 if (del_timer(&conntrack->timeout)) 833 if (del_timer(&ct->timeout))
845 conntrack->timeout.function((unsigned long) 834 ct->timeout.function((unsigned long)ct);
846 conntrack);
847 return -NF_REPEAT; 835 return -NF_REPEAT;
848 } 836 }
849 /* Fall through */ 837 /* Fall through */
@@ -855,10 +843,9 @@ static int tcp_packet(struct nf_conn *conntrack,
855 * c) ACK in reply direction after initial SYN in original. 843 * c) ACK in reply direction after initial SYN in original.
856 */ 844 */
857 if (index == TCP_SYNACK_SET 845 if (index == TCP_SYNACK_SET
858 && conntrack->proto.tcp.last_index == TCP_SYN_SET 846 && ct->proto.tcp.last_index == TCP_SYN_SET
859 && conntrack->proto.tcp.last_dir != dir 847 && ct->proto.tcp.last_dir != dir
860 && ntohl(th->ack_seq) == 848 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
861 conntrack->proto.tcp.last_end) {
862 /* This SYN/ACK acknowledges a SYN that we earlier 849 /* This SYN/ACK acknowledges a SYN that we earlier
863 * ignored as invalid. This means that the client and 850 * ignored as invalid. This means that the client and
864 * the server are both in sync, while the firewall is 851 * the server are both in sync, while the firewall is
@@ -870,15 +857,14 @@ static int tcp_packet(struct nf_conn *conntrack,
870 if (LOG_INVALID(IPPROTO_TCP)) 857 if (LOG_INVALID(IPPROTO_TCP))
871 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 858 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
872 "nf_ct_tcp: killing out of sync session "); 859 "nf_ct_tcp: killing out of sync session ");
873 if (del_timer(&conntrack->timeout)) 860 if (del_timer(&ct->timeout))
874 conntrack->timeout.function((unsigned long) 861 ct->timeout.function((unsigned long)ct);
875 conntrack);
876 return -NF_DROP; 862 return -NF_DROP;
877 } 863 }
878 conntrack->proto.tcp.last_index = index; 864 ct->proto.tcp.last_index = index;
879 conntrack->proto.tcp.last_dir = dir; 865 ct->proto.tcp.last_dir = dir;
880 conntrack->proto.tcp.last_seq = ntohl(th->seq); 866 ct->proto.tcp.last_seq = ntohl(th->seq);
881 conntrack->proto.tcp.last_end = 867 ct->proto.tcp.last_end =
882 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); 868 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
883 869
884 write_unlock_bh(&tcp_lock); 870 write_unlock_bh(&tcp_lock);
@@ -897,11 +883,11 @@ static int tcp_packet(struct nf_conn *conntrack,
897 return -NF_ACCEPT; 883 return -NF_ACCEPT;
898 case TCP_CONNTRACK_CLOSE: 884 case TCP_CONNTRACK_CLOSE:
899 if (index == TCP_RST_SET 885 if (index == TCP_RST_SET
900 && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) 886 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
901 && conntrack->proto.tcp.last_index == TCP_SYN_SET) 887 && ct->proto.tcp.last_index == TCP_SYN_SET)
902 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) 888 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
903 && conntrack->proto.tcp.last_index == TCP_ACK_SET)) 889 && ct->proto.tcp.last_index == TCP_ACK_SET))
904 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { 890 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
905 /* RST sent to invalid SYN or ACK we had let through 891 /* RST sent to invalid SYN or ACK we had let through
906 * at a) and c) above: 892 * at a) and c) above:
907 * 893 *
@@ -919,15 +905,15 @@ static int tcp_packet(struct nf_conn *conntrack,
919 break; 905 break;
920 } 906 }
921 907
922 if (!tcp_in_window(conntrack, &conntrack->proto.tcp, dir, index, 908 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
923 skb, dataoff, th, pf)) { 909 skb, dataoff, th, pf)) {
924 write_unlock_bh(&tcp_lock); 910 write_unlock_bh(&tcp_lock);
925 return -NF_ACCEPT; 911 return -NF_ACCEPT;
926 } 912 }
927 in_window: 913 in_window:
928 /* From now on we have got in-window packets */ 914 /* From now on we have got in-window packets */
929 conntrack->proto.tcp.last_index = index; 915 ct->proto.tcp.last_index = index;
930 conntrack->proto.tcp.last_dir = dir; 916 ct->proto.tcp.last_dir = dir;
931 917
932 pr_debug("tcp_conntracks: "); 918 pr_debug("tcp_conntracks: ");
933 NF_CT_DUMP_TUPLE(tuple); 919 NF_CT_DUMP_TUPLE(tuple);
@@ -936,55 +922,55 @@ static int tcp_packet(struct nf_conn *conntrack,
936 (th->fin ? 1 : 0), (th->rst ? 1 : 0), 922 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
937 old_state, new_state); 923 old_state, new_state);
938 924
939 conntrack->proto.tcp.state = new_state; 925 ct->proto.tcp.state = new_state;
940 if (old_state != new_state 926 if (old_state != new_state
941 && (new_state == TCP_CONNTRACK_FIN_WAIT 927 && (new_state == TCP_CONNTRACK_FIN_WAIT
942 || new_state == TCP_CONNTRACK_CLOSE)) 928 || new_state == TCP_CONNTRACK_CLOSE))
943 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 929 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
944 timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans 930 timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans
945 && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans 931 && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
946 ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; 932 ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state];
947 write_unlock_bh(&tcp_lock); 933 write_unlock_bh(&tcp_lock);
948 934
949 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); 935 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
950 if (new_state != old_state) 936 if (new_state != old_state)
951 nf_conntrack_event_cache(IPCT_PROTOINFO, skb); 937 nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
952 938
953 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { 939 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
954 /* If only reply is a RST, we can consider ourselves not to 940 /* If only reply is a RST, we can consider ourselves not to
955 have an established connection: this is a fairly common 941 have an established connection: this is a fairly common
956 problem case, so we can delete the conntrack 942 problem case, so we can delete the conntrack
957 immediately. --RR */ 943 immediately. --RR */
958 if (th->rst) { 944 if (th->rst) {
959 if (del_timer(&conntrack->timeout)) 945 if (del_timer(&ct->timeout))
960 conntrack->timeout.function((unsigned long) 946 ct->timeout.function((unsigned long)ct);
961 conntrack);
962 return NF_ACCEPT; 947 return NF_ACCEPT;
963 } 948 }
964 } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) 949 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
965 && (old_state == TCP_CONNTRACK_SYN_RECV 950 && (old_state == TCP_CONNTRACK_SYN_RECV
966 || old_state == TCP_CONNTRACK_ESTABLISHED) 951 || old_state == TCP_CONNTRACK_ESTABLISHED)
967 && new_state == TCP_CONNTRACK_ESTABLISHED) { 952 && new_state == TCP_CONNTRACK_ESTABLISHED) {
968 /* Set ASSURED if we see see valid ack in ESTABLISHED 953 /* Set ASSURED if we see see valid ack in ESTABLISHED
969 after SYN_RECV or a valid answer for a picked up 954 after SYN_RECV or a valid answer for a picked up
970 connection. */ 955 connection. */
971 set_bit(IPS_ASSURED_BIT, &conntrack->status); 956 set_bit(IPS_ASSURED_BIT, &ct->status);
972 nf_conntrack_event_cache(IPCT_STATUS, skb); 957 nf_conntrack_event_cache(IPCT_STATUS, skb);
973 } 958 }
974 nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); 959 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
975 960
976 return NF_ACCEPT; 961 return NF_ACCEPT;
977} 962}
978 963
979/* Called when a new connection for this protocol found. */ 964/* Called when a new connection for this protocol found. */
980static int tcp_new(struct nf_conn *conntrack, 965static int tcp_new(struct nf_conn *ct,
981 const struct sk_buff *skb, 966 const struct sk_buff *skb,
982 unsigned int dataoff) 967 unsigned int dataoff)
983{ 968{
984 enum tcp_conntrack new_state; 969 enum tcp_conntrack new_state;
985 struct tcphdr *th, _tcph; 970 const struct tcphdr *th;
986 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; 971 struct tcphdr _tcph;
987 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; 972 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
973 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
988 974
989 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 975 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
990 BUG_ON(th == NULL); 976 BUG_ON(th == NULL);
@@ -1002,17 +988,17 @@ static int tcp_new(struct nf_conn *conntrack,
1002 988
1003 if (new_state == TCP_CONNTRACK_SYN_SENT) { 989 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1004 /* SYN packet */ 990 /* SYN packet */
1005 conntrack->proto.tcp.seen[0].td_end = 991 ct->proto.tcp.seen[0].td_end =
1006 segment_seq_plus_len(ntohl(th->seq), skb->len, 992 segment_seq_plus_len(ntohl(th->seq), skb->len,
1007 dataoff, th); 993 dataoff, th);
1008 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 994 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1009 if (conntrack->proto.tcp.seen[0].td_maxwin == 0) 995 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1010 conntrack->proto.tcp.seen[0].td_maxwin = 1; 996 ct->proto.tcp.seen[0].td_maxwin = 1;
1011 conntrack->proto.tcp.seen[0].td_maxend = 997 ct->proto.tcp.seen[0].td_maxend =
1012 conntrack->proto.tcp.seen[0].td_end; 998 ct->proto.tcp.seen[0].td_end;
1013 999
1014 tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); 1000 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1015 conntrack->proto.tcp.seen[1].flags = 0; 1001 ct->proto.tcp.seen[1].flags = 0;
1016 } else if (nf_ct_tcp_loose == 0) { 1002 } else if (nf_ct_tcp_loose == 0) {
1017 /* Don't try to pick up connections. */ 1003 /* Don't try to pick up connections. */
1018 return 0; 1004 return 0;
@@ -1022,32 +1008,32 @@ static int tcp_new(struct nf_conn *conntrack,
1022 * its history is lost for us. 1008 * its history is lost for us.
1023 * Let's try to use the data from the packet. 1009 * Let's try to use the data from the packet.
1024 */ 1010 */
1025 conntrack->proto.tcp.seen[0].td_end = 1011 ct->proto.tcp.seen[0].td_end =
1026 segment_seq_plus_len(ntohl(th->seq), skb->len, 1012 segment_seq_plus_len(ntohl(th->seq), skb->len,
1027 dataoff, th); 1013 dataoff, th);
1028 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 1014 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1029 if (conntrack->proto.tcp.seen[0].td_maxwin == 0) 1015 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1030 conntrack->proto.tcp.seen[0].td_maxwin = 1; 1016 ct->proto.tcp.seen[0].td_maxwin = 1;
1031 conntrack->proto.tcp.seen[0].td_maxend = 1017 ct->proto.tcp.seen[0].td_maxend =
1032 conntrack->proto.tcp.seen[0].td_end + 1018 ct->proto.tcp.seen[0].td_end +
1033 conntrack->proto.tcp.seen[0].td_maxwin; 1019 ct->proto.tcp.seen[0].td_maxwin;
1034 conntrack->proto.tcp.seen[0].td_scale = 0; 1020 ct->proto.tcp.seen[0].td_scale = 0;
1035 1021
1036 /* We assume SACK and liberal window checking to handle 1022 /* We assume SACK and liberal window checking to handle
1037 * window scaling */ 1023 * window scaling */
1038 conntrack->proto.tcp.seen[0].flags = 1024 ct->proto.tcp.seen[0].flags =
1039 conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | 1025 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1040 IP_CT_TCP_FLAG_BE_LIBERAL; 1026 IP_CT_TCP_FLAG_BE_LIBERAL;
1041 } 1027 }
1042 1028
1043 conntrack->proto.tcp.seen[1].td_end = 0; 1029 ct->proto.tcp.seen[1].td_end = 0;
1044 conntrack->proto.tcp.seen[1].td_maxend = 0; 1030 ct->proto.tcp.seen[1].td_maxend = 0;
1045 conntrack->proto.tcp.seen[1].td_maxwin = 1; 1031 ct->proto.tcp.seen[1].td_maxwin = 1;
1046 conntrack->proto.tcp.seen[1].td_scale = 0; 1032 ct->proto.tcp.seen[1].td_scale = 0;
1047 1033
1048 /* tcp_packet will set them */ 1034 /* tcp_packet will set them */
1049 conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; 1035 ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1050 conntrack->proto.tcp.last_index = TCP_NONE_SET; 1036 ct->proto.tcp.last_index = TCP_NONE_SET;
1051 1037
1052 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " 1038 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1053 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 1039 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
@@ -1074,14 +1060,13 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1074 if (!nest_parms) 1060 if (!nest_parms)
1075 goto nla_put_failure; 1061 goto nla_put_failure;
1076 1062
1077 NLA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), 1063 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
1078 &ct->proto.tcp.state);
1079 1064
1080 NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t), 1065 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1081 &ct->proto.tcp.seen[0].td_scale); 1066 ct->proto.tcp.seen[0].td_scale);
1082 1067
1083 NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t), 1068 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1084 &ct->proto.tcp.seen[1].td_scale); 1069 ct->proto.tcp.seen[1].td_scale);
1085 1070
1086 tmp.flags = ct->proto.tcp.seen[0].flags; 1071 tmp.flags = ct->proto.tcp.seen[0].flags;
1087 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 1072 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
@@ -1111,16 +1096,16 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1111 1096
1112static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) 1097static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1113{ 1098{
1114 struct nlattr *attr = cda[CTA_PROTOINFO_TCP]; 1099 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1115 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; 1100 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1116 int err; 1101 int err;
1117 1102
1118 /* updates could not contain anything about the private 1103 /* updates could not contain anything about the private
1119 * protocol info, in that case skip the parsing */ 1104 * protocol info, in that case skip the parsing */
1120 if (!attr) 1105 if (!pattr)
1121 return 0; 1106 return 0;
1122 1107
1123 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr, tcp_nla_policy); 1108 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1124 if (err < 0) 1109 if (err < 0)
1125 return err; 1110 return err;
1126 1111
@@ -1128,8 +1113,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1128 return -EINVAL; 1113 return -EINVAL;
1129 1114
1130 write_lock_bh(&tcp_lock); 1115 write_lock_bh(&tcp_lock);
1131 ct->proto.tcp.state = 1116 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1132 *(u_int8_t *)nla_data(tb[CTA_PROTOINFO_TCP_STATE]);
1133 1117
1134 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { 1118 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1135 struct nf_ct_tcp_flags *attr = 1119 struct nf_ct_tcp_flags *attr =
@@ -1149,10 +1133,10 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1149 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && 1133 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1150 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && 1134 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1151 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { 1135 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1152 ct->proto.tcp.seen[0].td_scale = *(u_int8_t *) 1136 ct->proto.tcp.seen[0].td_scale =
1153 nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); 1137 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1154 ct->proto.tcp.seen[1].td_scale = *(u_int8_t *) 1138 ct->proto.tcp.seen[1].td_scale =
1155 nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); 1139 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1156 } 1140 }
1157 write_unlock_bh(&tcp_lock); 1141 write_unlock_bh(&tcp_lock);
1158 1142
@@ -1166,56 +1150,56 @@ static struct ctl_table_header *tcp_sysctl_header;
1166static struct ctl_table tcp_sysctl_table[] = { 1150static struct ctl_table tcp_sysctl_table[] = {
1167 { 1151 {
1168 .procname = "nf_conntrack_tcp_timeout_syn_sent", 1152 .procname = "nf_conntrack_tcp_timeout_syn_sent",
1169 .data = &nf_ct_tcp_timeout_syn_sent, 1153 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1170 .maxlen = sizeof(unsigned int), 1154 .maxlen = sizeof(unsigned int),
1171 .mode = 0644, 1155 .mode = 0644,
1172 .proc_handler = &proc_dointvec_jiffies, 1156 .proc_handler = &proc_dointvec_jiffies,
1173 }, 1157 },
1174 { 1158 {
1175 .procname = "nf_conntrack_tcp_timeout_syn_recv", 1159 .procname = "nf_conntrack_tcp_timeout_syn_recv",
1176 .data = &nf_ct_tcp_timeout_syn_recv, 1160 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1177 .maxlen = sizeof(unsigned int), 1161 .maxlen = sizeof(unsigned int),
1178 .mode = 0644, 1162 .mode = 0644,
1179 .proc_handler = &proc_dointvec_jiffies, 1163 .proc_handler = &proc_dointvec_jiffies,
1180 }, 1164 },
1181 { 1165 {
1182 .procname = "nf_conntrack_tcp_timeout_established", 1166 .procname = "nf_conntrack_tcp_timeout_established",
1183 .data = &nf_ct_tcp_timeout_established, 1167 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1184 .maxlen = sizeof(unsigned int), 1168 .maxlen = sizeof(unsigned int),
1185 .mode = 0644, 1169 .mode = 0644,
1186 .proc_handler = &proc_dointvec_jiffies, 1170 .proc_handler = &proc_dointvec_jiffies,
1187 }, 1171 },
1188 { 1172 {
1189 .procname = "nf_conntrack_tcp_timeout_fin_wait", 1173 .procname = "nf_conntrack_tcp_timeout_fin_wait",
1190 .data = &nf_ct_tcp_timeout_fin_wait, 1174 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1191 .maxlen = sizeof(unsigned int), 1175 .maxlen = sizeof(unsigned int),
1192 .mode = 0644, 1176 .mode = 0644,
1193 .proc_handler = &proc_dointvec_jiffies, 1177 .proc_handler = &proc_dointvec_jiffies,
1194 }, 1178 },
1195 { 1179 {
1196 .procname = "nf_conntrack_tcp_timeout_close_wait", 1180 .procname = "nf_conntrack_tcp_timeout_close_wait",
1197 .data = &nf_ct_tcp_timeout_close_wait, 1181 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1198 .maxlen = sizeof(unsigned int), 1182 .maxlen = sizeof(unsigned int),
1199 .mode = 0644, 1183 .mode = 0644,
1200 .proc_handler = &proc_dointvec_jiffies, 1184 .proc_handler = &proc_dointvec_jiffies,
1201 }, 1185 },
1202 { 1186 {
1203 .procname = "nf_conntrack_tcp_timeout_last_ack", 1187 .procname = "nf_conntrack_tcp_timeout_last_ack",
1204 .data = &nf_ct_tcp_timeout_last_ack, 1188 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1205 .maxlen = sizeof(unsigned int), 1189 .maxlen = sizeof(unsigned int),
1206 .mode = 0644, 1190 .mode = 0644,
1207 .proc_handler = &proc_dointvec_jiffies, 1191 .proc_handler = &proc_dointvec_jiffies,
1208 }, 1192 },
1209 { 1193 {
1210 .procname = "nf_conntrack_tcp_timeout_time_wait", 1194 .procname = "nf_conntrack_tcp_timeout_time_wait",
1211 .data = &nf_ct_tcp_timeout_time_wait, 1195 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1212 .maxlen = sizeof(unsigned int), 1196 .maxlen = sizeof(unsigned int),
1213 .mode = 0644, 1197 .mode = 0644,
1214 .proc_handler = &proc_dointvec_jiffies, 1198 .proc_handler = &proc_dointvec_jiffies,
1215 }, 1199 },
1216 { 1200 {
1217 .procname = "nf_conntrack_tcp_timeout_close", 1201 .procname = "nf_conntrack_tcp_timeout_close",
1218 .data = &nf_ct_tcp_timeout_close, 1202 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1219 .maxlen = sizeof(unsigned int), 1203 .maxlen = sizeof(unsigned int),
1220 .mode = 0644, 1204 .mode = 0644,
1221 .proc_handler = &proc_dointvec_jiffies, 1205 .proc_handler = &proc_dointvec_jiffies,
@@ -1260,56 +1244,56 @@ static struct ctl_table tcp_sysctl_table[] = {
1260static struct ctl_table tcp_compat_sysctl_table[] = { 1244static struct ctl_table tcp_compat_sysctl_table[] = {
1261 { 1245 {
1262 .procname = "ip_conntrack_tcp_timeout_syn_sent", 1246 .procname = "ip_conntrack_tcp_timeout_syn_sent",
1263 .data = &nf_ct_tcp_timeout_syn_sent, 1247 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1264 .maxlen = sizeof(unsigned int), 1248 .maxlen = sizeof(unsigned int),
1265 .mode = 0644, 1249 .mode = 0644,
1266 .proc_handler = &proc_dointvec_jiffies, 1250 .proc_handler = &proc_dointvec_jiffies,
1267 }, 1251 },
1268 { 1252 {
1269 .procname = "ip_conntrack_tcp_timeout_syn_recv", 1253 .procname = "ip_conntrack_tcp_timeout_syn_recv",
1270 .data = &nf_ct_tcp_timeout_syn_recv, 1254 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1271 .maxlen = sizeof(unsigned int), 1255 .maxlen = sizeof(unsigned int),
1272 .mode = 0644, 1256 .mode = 0644,
1273 .proc_handler = &proc_dointvec_jiffies, 1257 .proc_handler = &proc_dointvec_jiffies,
1274 }, 1258 },
1275 { 1259 {
1276 .procname = "ip_conntrack_tcp_timeout_established", 1260 .procname = "ip_conntrack_tcp_timeout_established",
1277 .data = &nf_ct_tcp_timeout_established, 1261 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1278 .maxlen = sizeof(unsigned int), 1262 .maxlen = sizeof(unsigned int),
1279 .mode = 0644, 1263 .mode = 0644,
1280 .proc_handler = &proc_dointvec_jiffies, 1264 .proc_handler = &proc_dointvec_jiffies,
1281 }, 1265 },
1282 { 1266 {
1283 .procname = "ip_conntrack_tcp_timeout_fin_wait", 1267 .procname = "ip_conntrack_tcp_timeout_fin_wait",
1284 .data = &nf_ct_tcp_timeout_fin_wait, 1268 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1285 .maxlen = sizeof(unsigned int), 1269 .maxlen = sizeof(unsigned int),
1286 .mode = 0644, 1270 .mode = 0644,
1287 .proc_handler = &proc_dointvec_jiffies, 1271 .proc_handler = &proc_dointvec_jiffies,
1288 }, 1272 },
1289 { 1273 {
1290 .procname = "ip_conntrack_tcp_timeout_close_wait", 1274 .procname = "ip_conntrack_tcp_timeout_close_wait",
1291 .data = &nf_ct_tcp_timeout_close_wait, 1275 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1292 .maxlen = sizeof(unsigned int), 1276 .maxlen = sizeof(unsigned int),
1293 .mode = 0644, 1277 .mode = 0644,
1294 .proc_handler = &proc_dointvec_jiffies, 1278 .proc_handler = &proc_dointvec_jiffies,
1295 }, 1279 },
1296 { 1280 {
1297 .procname = "ip_conntrack_tcp_timeout_last_ack", 1281 .procname = "ip_conntrack_tcp_timeout_last_ack",
1298 .data = &nf_ct_tcp_timeout_last_ack, 1282 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1299 .maxlen = sizeof(unsigned int), 1283 .maxlen = sizeof(unsigned int),
1300 .mode = 0644, 1284 .mode = 0644,
1301 .proc_handler = &proc_dointvec_jiffies, 1285 .proc_handler = &proc_dointvec_jiffies,
1302 }, 1286 },
1303 { 1287 {
1304 .procname = "ip_conntrack_tcp_timeout_time_wait", 1288 .procname = "ip_conntrack_tcp_timeout_time_wait",
1305 .data = &nf_ct_tcp_timeout_time_wait, 1289 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1306 .maxlen = sizeof(unsigned int), 1290 .maxlen = sizeof(unsigned int),
1307 .mode = 0644, 1291 .mode = 0644,
1308 .proc_handler = &proc_dointvec_jiffies, 1292 .proc_handler = &proc_dointvec_jiffies,
1309 }, 1293 },
1310 { 1294 {
1311 .procname = "ip_conntrack_tcp_timeout_close", 1295 .procname = "ip_conntrack_tcp_timeout_close",
1312 .data = &nf_ct_tcp_timeout_close, 1296 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1313 .maxlen = sizeof(unsigned int), 1297 .maxlen = sizeof(unsigned int),
1314 .mode = 0644, 1298 .mode = 0644,
1315 .proc_handler = &proc_dointvec_jiffies, 1299 .proc_handler = &proc_dointvec_jiffies,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index b3e7ecb080e6..b8a35cc06416 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -21,6 +21,7 @@
21#include <linux/netfilter_ipv6.h> 21#include <linux/netfilter_ipv6.h>
22#include <net/netfilter/nf_conntrack_l4proto.h> 22#include <net/netfilter/nf_conntrack_l4proto.h>
23#include <net/netfilter/nf_conntrack_ecache.h> 23#include <net/netfilter/nf_conntrack_ecache.h>
24#include <net/netfilter/nf_log.h>
24 25
25static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; 26static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
26static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; 27static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
@@ -29,7 +30,8 @@ static int udp_pkt_to_tuple(const struct sk_buff *skb,
29 unsigned int dataoff, 30 unsigned int dataoff,
30 struct nf_conntrack_tuple *tuple) 31 struct nf_conntrack_tuple *tuple)
31{ 32{
32 struct udphdr _hdr, *hp; 33 const struct udphdr *hp;
34 struct udphdr _hdr;
33 35
34 /* Actually only need first 8 bytes. */ 36 /* Actually only need first 8 bytes. */
35 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 37 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
@@ -59,15 +61,8 @@ static int udp_print_tuple(struct seq_file *s,
59 ntohs(tuple->dst.u.udp.port)); 61 ntohs(tuple->dst.u.udp.port));
60} 62}
61 63
62/* Print out the private part of the conntrack. */
63static int udp_print_conntrack(struct seq_file *s,
64 const struct nf_conn *conntrack)
65{
66 return 0;
67}
68
69/* Returns verdict for packet, and may modify conntracktype */ 64/* Returns verdict for packet, and may modify conntracktype */
70static int udp_packet(struct nf_conn *conntrack, 65static int udp_packet(struct nf_conn *ct,
71 const struct sk_buff *skb, 66 const struct sk_buff *skb,
72 unsigned int dataoff, 67 unsigned int dataoff,
73 enum ip_conntrack_info ctinfo, 68 enum ip_conntrack_info ctinfo,
@@ -76,20 +71,19 @@ static int udp_packet(struct nf_conn *conntrack,
76{ 71{
77 /* If we've seen traffic both ways, this is some kind of UDP 72 /* If we've seen traffic both ways, this is some kind of UDP
78 stream. Extend timeout. */ 73 stream. Extend timeout. */
79 if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { 74 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
80 nf_ct_refresh_acct(conntrack, ctinfo, skb, 75 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
81 nf_ct_udp_timeout_stream);
82 /* Also, more likely to be important, and not a probe */ 76 /* Also, more likely to be important, and not a probe */
83 if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) 77 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
84 nf_conntrack_event_cache(IPCT_STATUS, skb); 78 nf_conntrack_event_cache(IPCT_STATUS, skb);
85 } else 79 } else
86 nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout); 80 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
87 81
88 return NF_ACCEPT; 82 return NF_ACCEPT;
89} 83}
90 84
91/* Called when a new connection for this protocol found. */ 85/* Called when a new connection for this protocol found. */
92static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb, 86static int udp_new(struct nf_conn *ct, const struct sk_buff *skb,
93 unsigned int dataoff) 87 unsigned int dataoff)
94{ 88{
95 return 1; 89 return 1;
@@ -101,7 +95,8 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
101 unsigned int hooknum) 95 unsigned int hooknum)
102{ 96{
103 unsigned int udplen = skb->len - dataoff; 97 unsigned int udplen = skb->len - dataoff;
104 struct udphdr _hdr, *hdr; 98 const struct udphdr *hdr;
99 struct udphdr _hdr;
105 100
106 /* Header is too small? */ 101 /* Header is too small? */
107 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 102 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
@@ -128,9 +123,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
128 * We skip checking packets on the outgoing path 123 * We skip checking packets on the outgoing path
129 * because the checksum is assumed to be correct. 124 * because the checksum is assumed to be correct.
130 * FIXME: Source route IP option packets --RR */ 125 * FIXME: Source route IP option packets --RR */
131 if (nf_conntrack_checksum && 126 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
132 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
133 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
134 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { 127 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
135 if (LOG_INVALID(IPPROTO_UDP)) 128 if (LOG_INVALID(IPPROTO_UDP))
136 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 129 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -194,7 +187,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
194 .pkt_to_tuple = udp_pkt_to_tuple, 187 .pkt_to_tuple = udp_pkt_to_tuple,
195 .invert_tuple = udp_invert_tuple, 188 .invert_tuple = udp_invert_tuple,
196 .print_tuple = udp_print_tuple, 189 .print_tuple = udp_print_tuple,
197 .print_conntrack = udp_print_conntrack,
198 .packet = udp_packet, 190 .packet = udp_packet,
199 .new = udp_new, 191 .new = udp_new,
200 .error = udp_error, 192 .error = udp_error,
@@ -222,7 +214,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
222 .pkt_to_tuple = udp_pkt_to_tuple, 214 .pkt_to_tuple = udp_pkt_to_tuple,
223 .invert_tuple = udp_invert_tuple, 215 .invert_tuple = udp_invert_tuple,
224 .print_tuple = udp_print_tuple, 216 .print_tuple = udp_print_tuple,
225 .print_conntrack = udp_print_conntrack,
226 .packet = udp_packet, 217 .packet = udp_packet,
227 .new = udp_new, 218 .new = udp_new,
228 .error = udp_error, 219 .error = udp_error,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index b8981dd922be..9dd03c7aeac6 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -22,6 +22,7 @@
22#include <linux/netfilter_ipv6.h> 22#include <linux/netfilter_ipv6.h>
23#include <net/netfilter/nf_conntrack_l4proto.h> 23#include <net/netfilter/nf_conntrack_l4proto.h>
24#include <net/netfilter/nf_conntrack_ecache.h> 24#include <net/netfilter/nf_conntrack_ecache.h>
25#include <net/netfilter/nf_log.h>
25 26
26static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ; 27static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ;
27static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ; 28static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ;
@@ -30,7 +31,8 @@ static int udplite_pkt_to_tuple(const struct sk_buff *skb,
30 unsigned int dataoff, 31 unsigned int dataoff,
31 struct nf_conntrack_tuple *tuple) 32 struct nf_conntrack_tuple *tuple)
32{ 33{
33 struct udphdr _hdr, *hp; 34 const struct udphdr *hp;
35 struct udphdr _hdr;
34 36
35 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 37 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
36 if (hp == NULL) 38 if (hp == NULL)
@@ -58,15 +60,8 @@ static int udplite_print_tuple(struct seq_file *s,
58 ntohs(tuple->dst.u.udp.port)); 60 ntohs(tuple->dst.u.udp.port));
59} 61}
60 62
61/* Print out the private part of the conntrack. */
62static int udplite_print_conntrack(struct seq_file *s,
63 const struct nf_conn *conntrack)
64{
65 return 0;
66}
67
68/* Returns verdict for packet, and may modify conntracktype */ 63/* Returns verdict for packet, and may modify conntracktype */
69static int udplite_packet(struct nf_conn *conntrack, 64static int udplite_packet(struct nf_conn *ct,
70 const struct sk_buff *skb, 65 const struct sk_buff *skb,
71 unsigned int dataoff, 66 unsigned int dataoff,
72 enum ip_conntrack_info ctinfo, 67 enum ip_conntrack_info ctinfo,
@@ -75,21 +70,20 @@ static int udplite_packet(struct nf_conn *conntrack,
75{ 70{
76 /* If we've seen traffic both ways, this is some kind of UDP 71 /* If we've seen traffic both ways, this is some kind of UDP
77 stream. Extend timeout. */ 72 stream. Extend timeout. */
78 if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { 73 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
79 nf_ct_refresh_acct(conntrack, ctinfo, skb, 74 nf_ct_refresh_acct(ct, ctinfo, skb,
80 nf_ct_udplite_timeout_stream); 75 nf_ct_udplite_timeout_stream);
81 /* Also, more likely to be important, and not a probe */ 76 /* Also, more likely to be important, and not a probe */
82 if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) 77 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
83 nf_conntrack_event_cache(IPCT_STATUS, skb); 78 nf_conntrack_event_cache(IPCT_STATUS, skb);
84 } else 79 } else
85 nf_ct_refresh_acct(conntrack, ctinfo, skb, 80 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
86 nf_ct_udplite_timeout);
87 81
88 return NF_ACCEPT; 82 return NF_ACCEPT;
89} 83}
90 84
91/* Called when a new connection for this protocol found. */ 85/* Called when a new connection for this protocol found. */
92static int udplite_new(struct nf_conn *conntrack, const struct sk_buff *skb, 86static int udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
93 unsigned int dataoff) 87 unsigned int dataoff)
94{ 88{
95 return 1; 89 return 1;
@@ -101,7 +95,8 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
101 unsigned int hooknum) 95 unsigned int hooknum)
102{ 96{
103 unsigned int udplen = skb->len - dataoff; 97 unsigned int udplen = skb->len - dataoff;
104 struct udphdr _hdr, *hdr; 98 const struct udphdr *hdr;
99 struct udphdr _hdr;
105 unsigned int cscov; 100 unsigned int cscov;
106 101
107 /* Header is too small? */ 102 /* Header is too small? */
@@ -133,8 +128,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
133 128
134 /* Checksum invalid? Ignore. */ 129 /* Checksum invalid? Ignore. */
135 if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) && 130 if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) &&
136 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || 131 hooknum == NF_INET_PRE_ROUTING) {
137 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))) {
138 if (pf == PF_INET) { 132 if (pf == PF_INET) {
139 struct iphdr *iph = ip_hdr(skb); 133 struct iphdr *iph = ip_hdr(skb);
140 134
@@ -198,7 +192,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
198 .pkt_to_tuple = udplite_pkt_to_tuple, 192 .pkt_to_tuple = udplite_pkt_to_tuple,
199 .invert_tuple = udplite_invert_tuple, 193 .invert_tuple = udplite_invert_tuple,
200 .print_tuple = udplite_print_tuple, 194 .print_tuple = udplite_print_tuple,
201 .print_conntrack = udplite_print_conntrack,
202 .packet = udplite_packet, 195 .packet = udplite_packet,
203 .new = udplite_new, 196 .new = udplite_new,
204 .error = udplite_error, 197 .error = udplite_error,
@@ -222,7 +215,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
222 .pkt_to_tuple = udplite_pkt_to_tuple, 215 .pkt_to_tuple = udplite_pkt_to_tuple,
223 .invert_tuple = udplite_invert_tuple, 216 .invert_tuple = udplite_invert_tuple,
224 .print_tuple = udplite_print_tuple, 217 .print_tuple = udplite_print_tuple,
225 .print_conntrack = udplite_print_conntrack,
226 .packet = udplite_packet, 218 .packet = udplite_packet,
227 .new = udplite_new, 219 .new = udplite_new,
228 .error = udplite_error, 220 .error = udplite_error,
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index b5a16c6e21c2..a70051d741a7 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -62,8 +62,9 @@ static int help(struct sk_buff *skb,
62 enum ip_conntrack_info ctinfo) 62 enum ip_conntrack_info ctinfo)
63{ 63{
64 unsigned int dataoff, datalen; 64 unsigned int dataoff, datalen;
65 struct tcphdr _tcph, *th; 65 const struct tcphdr *th;
66 char *sb_ptr; 66 struct tcphdr _tcph;
67 void *sb_ptr;
67 int ret = NF_ACCEPT; 68 int ret = NF_ACCEPT;
68 int dir = CTINFO2DIR(ctinfo); 69 int dir = CTINFO2DIR(ctinfo);
69 struct nf_ct_sane_master *ct_sane_info; 70 struct nf_ct_sane_master *ct_sane_info;
@@ -99,7 +100,7 @@ static int help(struct sk_buff *skb,
99 if (datalen != sizeof(struct sane_request)) 100 if (datalen != sizeof(struct sane_request))
100 goto out; 101 goto out;
101 102
102 req = (struct sane_request *)sb_ptr; 103 req = sb_ptr;
103 if (req->RPC_code != htonl(SANE_NET_START)) { 104 if (req->RPC_code != htonl(SANE_NET_START)) {
104 /* Not an interesting command */ 105 /* Not an interesting command */
105 ct_sane_info->state = SANE_STATE_NORMAL; 106 ct_sane_info->state = SANE_STATE_NORMAL;
@@ -123,7 +124,7 @@ static int help(struct sk_buff *skb,
123 goto out; 124 goto out;
124 } 125 }
125 126
126 reply = (struct sane_reply_net_start *)sb_ptr; 127 reply = sb_ptr;
127 if (reply->status != htonl(SANE_STATUS_SUCCESS)) { 128 if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
128 /* saned refused the command */ 129 /* saned refused the command */
129 pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", 130 pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n",
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 8f8b5a48df38..c521c891d351 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ip_conntrack_sip");
28 28
29#define MAX_PORTS 8 29#define MAX_PORTS 8
30static unsigned short ports[MAX_PORTS]; 30static unsigned short ports[MAX_PORTS];
31static int ports_c; 31static unsigned int ports_c;
32module_param_array(ports, ushort, &ports_c, 0400); 32module_param_array(ports, ushort, &ports_c, 0400);
33MODULE_PARM_DESC(ports, "port numbers of SIP servers"); 33MODULE_PARM_DESC(ports, "port numbers of SIP servers");
34 34
@@ -48,10 +48,10 @@ unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb,
48 const char *dptr) __read_mostly; 48 const char *dptr) __read_mostly;
49EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); 49EXPORT_SYMBOL_GPL(nf_nat_sdp_hook);
50 50
51static int digits_len(struct nf_conn *, const char *, const char *, int *); 51static int digits_len(const struct nf_conn *, const char *, const char *, int *);
52static int epaddr_len(struct nf_conn *, const char *, const char *, int *); 52static int epaddr_len(const struct nf_conn *, const char *, const char *, int *);
53static int skp_digits_len(struct nf_conn *, const char *, const char *, int *); 53static int skp_digits_len(const struct nf_conn *, const char *, const char *, int *);
54static int skp_epaddr_len(struct nf_conn *, const char *, const char *, int *); 54static int skp_epaddr_len(const struct nf_conn *, const char *, const char *, int *);
55 55
56struct sip_header_nfo { 56struct sip_header_nfo {
57 const char *lname; 57 const char *lname;
@@ -61,7 +61,7 @@ struct sip_header_nfo {
61 size_t snlen; 61 size_t snlen;
62 size_t ln_strlen; 62 size_t ln_strlen;
63 int case_sensitive; 63 int case_sensitive;
64 int (*match_len)(struct nf_conn *, const char *, 64 int (*match_len)(const struct nf_conn *, const char *,
65 const char *, int *); 65 const char *, int *);
66}; 66};
67 67
@@ -187,7 +187,7 @@ static const struct sip_header_nfo ct_sip_hdrs[] = {
187 } 187 }
188}; 188};
189 189
190/* get line lenght until first CR or LF seen. */ 190/* get line length until first CR or LF seen. */
191int ct_sip_lnlen(const char *line, const char *limit) 191int ct_sip_lnlen(const char *line, const char *limit)
192{ 192{
193 const char *k = line; 193 const char *k = line;
@@ -225,7 +225,7 @@ const char *ct_sip_search(const char *needle, const char *haystack,
225} 225}
226EXPORT_SYMBOL_GPL(ct_sip_search); 226EXPORT_SYMBOL_GPL(ct_sip_search);
227 227
228static int digits_len(struct nf_conn *ct, const char *dptr, 228static int digits_len(const struct nf_conn *ct, const char *dptr,
229 const char *limit, int *shift) 229 const char *limit, int *shift)
230{ 230{
231 int len = 0; 231 int len = 0;
@@ -236,8 +236,8 @@ static int digits_len(struct nf_conn *ct, const char *dptr,
236 return len; 236 return len;
237} 237}
238 238
239/* get digits lenght, skiping blank spaces. */ 239/* get digits length, skipping blank spaces. */
240static int skp_digits_len(struct nf_conn *ct, const char *dptr, 240static int skp_digits_len(const struct nf_conn *ct, const char *dptr,
241 const char *limit, int *shift) 241 const char *limit, int *shift)
242{ 242{
243 for (; dptr <= limit && *dptr == ' '; dptr++) 243 for (; dptr <= limit && *dptr == ' '; dptr++)
@@ -246,8 +246,9 @@ static int skp_digits_len(struct nf_conn *ct, const char *dptr,
246 return digits_len(ct, dptr, limit, shift); 246 return digits_len(ct, dptr, limit, shift);
247} 247}
248 248
249static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp, 249static int parse_addr(const struct nf_conn *ct, const char *cp,
250 union nf_conntrack_address *addr, const char *limit) 250 const char **endp, union nf_inet_addr *addr,
251 const char *limit)
251{ 252{
252 const char *end; 253 const char *end;
253 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 254 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
@@ -272,10 +273,10 @@ static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp,
272} 273}
273 274
274/* skip ip address. returns its length. */ 275/* skip ip address. returns its length. */
275static int epaddr_len(struct nf_conn *ct, const char *dptr, 276static int epaddr_len(const struct nf_conn *ct, const char *dptr,
276 const char *limit, int *shift) 277 const char *limit, int *shift)
277{ 278{
278 union nf_conntrack_address addr; 279 union nf_inet_addr addr;
279 const char *aux = dptr; 280 const char *aux = dptr;
280 281
281 if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { 282 if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
@@ -292,7 +293,7 @@ static int epaddr_len(struct nf_conn *ct, const char *dptr,
292} 293}
293 294
294/* get address length, skiping user info. */ 295/* get address length, skiping user info. */
295static int skp_epaddr_len(struct nf_conn *ct, const char *dptr, 296static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr,
296 const char *limit, int *shift) 297 const char *limit, int *shift)
297{ 298{
298 const char *start = dptr; 299 const char *start = dptr;
@@ -319,7 +320,7 @@ static int skp_epaddr_len(struct nf_conn *ct, const char *dptr,
319} 320}
320 321
321/* Returns 0 if not found, -1 error parsing. */ 322/* Returns 0 if not found, -1 error parsing. */
322int ct_sip_get_info(struct nf_conn *ct, 323int ct_sip_get_info(const struct nf_conn *ct,
323 const char *dptr, size_t dlen, 324 const char *dptr, size_t dlen,
324 unsigned int *matchoff, 325 unsigned int *matchoff,
325 unsigned int *matchlen, 326 unsigned int *matchlen,
@@ -366,7 +367,7 @@ EXPORT_SYMBOL_GPL(ct_sip_get_info);
366static int set_expected_rtp(struct sk_buff *skb, 367static int set_expected_rtp(struct sk_buff *skb,
367 struct nf_conn *ct, 368 struct nf_conn *ct,
368 enum ip_conntrack_info ctinfo, 369 enum ip_conntrack_info ctinfo,
369 union nf_conntrack_address *addr, 370 union nf_inet_addr *addr,
370 __be16 port, 371 __be16 port,
371 const char *dptr) 372 const char *dptr)
372{ 373{
@@ -403,11 +404,11 @@ static int sip_help(struct sk_buff *skb,
403 enum ip_conntrack_info ctinfo) 404 enum ip_conntrack_info ctinfo)
404{ 405{
405 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 406 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
406 union nf_conntrack_address addr; 407 union nf_inet_addr addr;
407 unsigned int dataoff, datalen; 408 unsigned int dataoff, datalen;
408 const char *dptr; 409 const char *dptr;
409 int ret = NF_ACCEPT; 410 int ret = NF_ACCEPT;
410 int matchoff, matchlen; 411 unsigned int matchoff, matchlen;
411 u_int16_t port; 412 u_int16_t port;
412 enum sip_header_pos pos; 413 enum sip_header_pos pos;
413 typeof(nf_nat_sip_hook) nf_nat_sip; 414 typeof(nf_nat_sip_hook) nf_nat_sip;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9efdd37fc195..e88e96af613d 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -31,8 +31,8 @@ MODULE_LICENSE("GPL");
31#ifdef CONFIG_PROC_FS 31#ifdef CONFIG_PROC_FS
32int 32int
33print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 33print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
34 struct nf_conntrack_l3proto *l3proto, 34 const struct nf_conntrack_l3proto *l3proto,
35 struct nf_conntrack_l4proto *l4proto) 35 const struct nf_conntrack_l4proto *l4proto)
36{ 36{
37 return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple); 37 return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple);
38} 38}
@@ -58,12 +58,14 @@ struct ct_iter_state {
58static struct hlist_node *ct_get_first(struct seq_file *seq) 58static struct hlist_node *ct_get_first(struct seq_file *seq)
59{ 59{
60 struct ct_iter_state *st = seq->private; 60 struct ct_iter_state *st = seq->private;
61 struct hlist_node *n;
61 62
62 for (st->bucket = 0; 63 for (st->bucket = 0;
63 st->bucket < nf_conntrack_htable_size; 64 st->bucket < nf_conntrack_htable_size;
64 st->bucket++) { 65 st->bucket++) {
65 if (!hlist_empty(&nf_conntrack_hash[st->bucket])) 66 n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
66 return nf_conntrack_hash[st->bucket].first; 67 if (n)
68 return n;
67 } 69 }
68 return NULL; 70 return NULL;
69} 71}
@@ -73,11 +75,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq,
73{ 75{
74 struct ct_iter_state *st = seq->private; 76 struct ct_iter_state *st = seq->private;
75 77
76 head = head->next; 78 head = rcu_dereference(head->next);
77 while (head == NULL) { 79 while (head == NULL) {
78 if (++st->bucket >= nf_conntrack_htable_size) 80 if (++st->bucket >= nf_conntrack_htable_size)
79 return NULL; 81 return NULL;
80 head = nf_conntrack_hash[st->bucket].first; 82 head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
81 } 83 }
82 return head; 84 return head;
83} 85}
@@ -93,8 +95,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
93} 95}
94 96
95static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 97static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
98 __acquires(RCU)
96{ 99{
97 read_lock_bh(&nf_conntrack_lock); 100 rcu_read_lock();
98 return ct_get_idx(seq, *pos); 101 return ct_get_idx(seq, *pos);
99} 102}
100 103
@@ -105,82 +108,80 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
105} 108}
106 109
107static void ct_seq_stop(struct seq_file *s, void *v) 110static void ct_seq_stop(struct seq_file *s, void *v)
111 __releases(RCU)
108{ 112{
109 read_unlock_bh(&nf_conntrack_lock); 113 rcu_read_unlock();
110} 114}
111 115
112/* return 0 on success, 1 in case of error */ 116/* return 0 on success, 1 in case of error */
113static int ct_seq_show(struct seq_file *s, void *v) 117static int ct_seq_show(struct seq_file *s, void *v)
114{ 118{
115 const struct nf_conntrack_tuple_hash *hash = v; 119 const struct nf_conntrack_tuple_hash *hash = v;
116 const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash); 120 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
117 struct nf_conntrack_l3proto *l3proto; 121 const struct nf_conntrack_l3proto *l3proto;
118 struct nf_conntrack_l4proto *l4proto; 122 const struct nf_conntrack_l4proto *l4proto;
119 123
120 NF_CT_ASSERT(conntrack); 124 NF_CT_ASSERT(ct);
121 125
122 /* we only want to print DIR_ORIGINAL */ 126 /* we only want to print DIR_ORIGINAL */
123 if (NF_CT_DIRECTION(hash)) 127 if (NF_CT_DIRECTION(hash))
124 return 0; 128 return 0;
125 129
126 l3proto = __nf_ct_l3proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] 130 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL]
127 .tuple.src.l3num); 131 .tuple.src.l3num);
128 132
129 NF_CT_ASSERT(l3proto); 133 NF_CT_ASSERT(l3proto);
130 l4proto = __nf_ct_l4proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] 134 l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL]
131 .tuple.src.l3num, 135 .tuple.src.l3num,
132 conntrack->tuplehash[IP_CT_DIR_ORIGINAL] 136 ct->tuplehash[IP_CT_DIR_ORIGINAL]
133 .tuple.dst.protonum); 137 .tuple.dst.protonum);
134 NF_CT_ASSERT(l4proto); 138 NF_CT_ASSERT(l4proto);
135 139
136 if (seq_printf(s, "%-8s %u %-8s %u %ld ", 140 if (seq_printf(s, "%-8s %u %-8s %u %ld ",
137 l3proto->name, 141 l3proto->name,
138 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, 142 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num,
139 l4proto->name, 143 l4proto->name,
140 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, 144 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
141 timer_pending(&conntrack->timeout) 145 timer_pending(&ct->timeout)
142 ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0) 146 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
143 return -ENOSPC; 147 return -ENOSPC;
144 148
145 if (l3proto->print_conntrack(s, conntrack)) 149 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
146 return -ENOSPC; 150 return -ENOSPC;
147 151
148 if (l4proto->print_conntrack(s, conntrack)) 152 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
149 return -ENOSPC;
150
151 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
152 l3proto, l4proto)) 153 l3proto, l4proto))
153 return -ENOSPC; 154 return -ENOSPC;
154 155
155 if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) 156 if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL]))
156 return -ENOSPC; 157 return -ENOSPC;
157 158
158 if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) 159 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
159 if (seq_printf(s, "[UNREPLIED] ")) 160 if (seq_printf(s, "[UNREPLIED] "))
160 return -ENOSPC; 161 return -ENOSPC;
161 162
162 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, 163 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
163 l3proto, l4proto)) 164 l3proto, l4proto))
164 return -ENOSPC; 165 return -ENOSPC;
165 166
166 if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) 167 if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY]))
167 return -ENOSPC; 168 return -ENOSPC;
168 169
169 if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) 170 if (test_bit(IPS_ASSURED_BIT, &ct->status))
170 if (seq_printf(s, "[ASSURED] ")) 171 if (seq_printf(s, "[ASSURED] "))
171 return -ENOSPC; 172 return -ENOSPC;
172 173
173#if defined(CONFIG_NF_CONNTRACK_MARK) 174#if defined(CONFIG_NF_CONNTRACK_MARK)
174 if (seq_printf(s, "mark=%u ", conntrack->mark)) 175 if (seq_printf(s, "mark=%u ", ct->mark))
175 return -ENOSPC; 176 return -ENOSPC;
176#endif 177#endif
177 178
178#ifdef CONFIG_NF_CONNTRACK_SECMARK 179#ifdef CONFIG_NF_CONNTRACK_SECMARK
179 if (seq_printf(s, "secmark=%u ", conntrack->secmark)) 180 if (seq_printf(s, "secmark=%u ", ct->secmark))
180 return -ENOSPC; 181 return -ENOSPC;
181#endif 182#endif
182 183
183 if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) 184 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
184 return -ENOSPC; 185 return -ENOSPC;
185 186
186 return 0; 187 return 0;
@@ -245,7 +246,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
245static int ct_cpu_seq_show(struct seq_file *seq, void *v) 246static int ct_cpu_seq_show(struct seq_file *seq, void *v)
246{ 247{
247 unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); 248 unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
248 struct ip_conntrack_stat *st = v; 249 const struct ip_conntrack_stat *st = v;
249 250
250 if (v == SEQ_START_TOKEN) { 251 if (v == SEQ_START_TOKEN) {
251 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); 252 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
@@ -383,15 +384,11 @@ static ctl_table nf_ct_netfilter_table[] = {
383 { .ctl_name = 0 } 384 { .ctl_name = 0 }
384}; 385};
385 386
386static ctl_table nf_ct_net_table[] = { 387static struct ctl_path nf_ct_path[] = {
387 { 388 { .procname = "net", .ctl_name = CTL_NET, },
388 .ctl_name = CTL_NET, 389 { }
389 .procname = "net",
390 .mode = 0555,
391 .child = nf_ct_netfilter_table,
392 },
393 { .ctl_name = 0 }
394}; 390};
391
395EXPORT_SYMBOL_GPL(nf_ct_log_invalid); 392EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
396#endif /* CONFIG_SYSCTL */ 393#endif /* CONFIG_SYSCTL */
397 394
@@ -418,7 +415,8 @@ static int __init nf_conntrack_standalone_init(void)
418 proc_stat->owner = THIS_MODULE; 415 proc_stat->owner = THIS_MODULE;
419#endif 416#endif
420#ifdef CONFIG_SYSCTL 417#ifdef CONFIG_SYSCTL
421 nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table); 418 nf_ct_sysctl_header = register_sysctl_paths(nf_ct_path,
419 nf_ct_netfilter_table);
422 if (nf_ct_sysctl_header == NULL) { 420 if (nf_ct_sysctl_header == NULL) {
423 printk("nf_conntrack: can't register to sysctl.\n"); 421 printk("nf_conntrack: can't register to sysctl.\n");
424 ret = -ENOMEM; 422 ret = -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index e894aa1ff3ad..bd2e800f23cc 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ip_conntrack_tftp");
25 25
26#define MAX_PORTS 8 26#define MAX_PORTS 8
27static unsigned short ports[MAX_PORTS]; 27static unsigned short ports[MAX_PORTS];
28static int ports_c; 28static unsigned int ports_c;
29module_param_array(ports, ushort, &ports_c, 0400); 29module_param_array(ports, ushort, &ports_c, 0400);
30MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); 30MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
31 31
@@ -39,7 +39,8 @@ static int tftp_help(struct sk_buff *skb,
39 struct nf_conn *ct, 39 struct nf_conn *ct,
40 enum ip_conntrack_info ctinfo) 40 enum ip_conntrack_info ctinfo)
41{ 41{
42 struct tftphdr _tftph, *tfh; 42 const struct tftphdr *tfh;
43 struct tftphdr _tftph;
43 struct nf_conntrack_expect *exp; 44 struct nf_conntrack_expect *exp;
44 struct nf_conntrack_tuple *tuple; 45 struct nf_conntrack_tuple *tuple;
45 unsigned int ret = NF_ACCEPT; 46 unsigned int ret = NF_ACCEPT;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index d67c4fbf6031..cec9976aecbf 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -6,6 +6,7 @@
6#include <linux/netfilter.h> 6#include <linux/netfilter.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <net/protocol.h> 8#include <net/protocol.h>
9#include <net/netfilter/nf_log.h>
9 10
10#include "nf_internals.h" 11#include "nf_internals.h"
11 12
@@ -14,12 +15,12 @@
14 15
15#define NF_LOG_PREFIXLEN 128 16#define NF_LOG_PREFIXLEN 128
16 17
17static struct nf_logger *nf_loggers[NPROTO]; 18static const struct nf_logger *nf_loggers[NPROTO] __read_mostly;
18static DEFINE_MUTEX(nf_log_mutex); 19static DEFINE_MUTEX(nf_log_mutex);
19 20
20/* return EBUSY if somebody else is registered, EEXIST if the same logger 21/* return EBUSY if somebody else is registered, EEXIST if the same logger
21 * is registred, 0 on success. */ 22 * is registred, 0 on success. */
22int nf_log_register(int pf, struct nf_logger *logger) 23int nf_log_register(int pf, const struct nf_logger *logger)
23{ 24{
24 int ret; 25 int ret;
25 26
@@ -57,7 +58,7 @@ void nf_log_unregister_pf(int pf)
57} 58}
58EXPORT_SYMBOL(nf_log_unregister_pf); 59EXPORT_SYMBOL(nf_log_unregister_pf);
59 60
60void nf_log_unregister(struct nf_logger *logger) 61void nf_log_unregister(const struct nf_logger *logger)
61{ 62{
62 int i; 63 int i;
63 64
@@ -77,12 +78,12 @@ void nf_log_packet(int pf,
77 const struct sk_buff *skb, 78 const struct sk_buff *skb,
78 const struct net_device *in, 79 const struct net_device *in,
79 const struct net_device *out, 80 const struct net_device *out,
80 struct nf_loginfo *loginfo, 81 const struct nf_loginfo *loginfo,
81 const char *fmt, ...) 82 const char *fmt, ...)
82{ 83{
83 va_list args; 84 va_list args;
84 char prefix[NF_LOG_PREFIXLEN]; 85 char prefix[NF_LOG_PREFIXLEN];
85 struct nf_logger *logger; 86 const struct nf_logger *logger;
86 87
87 rcu_read_lock(); 88 rcu_read_lock();
88 logger = rcu_dereference(nf_loggers[pf]); 89 logger = rcu_dereference(nf_loggers[pf]);
@@ -90,7 +91,6 @@ void nf_log_packet(int pf,
90 va_start(args, fmt); 91 va_start(args, fmt);
91 vsnprintf(prefix, sizeof(prefix), fmt, args); 92 vsnprintf(prefix, sizeof(prefix), fmt, args);
92 va_end(args); 93 va_end(args);
93 /* We must read logging before nf_logfn[pf] */
94 logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); 94 logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
95 } else if (net_ratelimit()) { 95 } else if (net_ratelimit()) {
96 printk(KERN_WARNING "nf_log_packet: can\'t log since " 96 printk(KERN_WARNING "nf_log_packet: can\'t log since "
@@ -103,6 +103,7 @@ EXPORT_SYMBOL(nf_log_packet);
103 103
104#ifdef CONFIG_PROC_FS 104#ifdef CONFIG_PROC_FS
105static void *seq_start(struct seq_file *seq, loff_t *pos) 105static void *seq_start(struct seq_file *seq, loff_t *pos)
106 __acquires(RCU)
106{ 107{
107 rcu_read_lock(); 108 rcu_read_lock();
108 109
@@ -123,6 +124,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
123} 124}
124 125
125static void seq_stop(struct seq_file *s, void *v) 126static void seq_stop(struct seq_file *s, void *v)
127 __releases(RCU)
126{ 128{
127 rcu_read_unlock(); 129 rcu_read_unlock();
128} 130}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0cef1433d660..bfc2928c1912 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -7,6 +7,7 @@
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/rcupdate.h> 8#include <linux/rcupdate.h>
9#include <net/protocol.h> 9#include <net/protocol.h>
10#include <net/netfilter/nf_queue.h>
10 11
11#include "nf_internals.h" 12#include "nf_internals.h"
12 13
@@ -15,13 +16,13 @@
15 * long term mutex. The handler must provide an an outfn() to accept packets 16 * long term mutex. The handler must provide an an outfn() to accept packets
16 * for queueing and must reinject all packets it receives, no matter what. 17 * for queueing and must reinject all packets it receives, no matter what.
17 */ 18 */
18static struct nf_queue_handler *queue_handler[NPROTO]; 19static const struct nf_queue_handler *queue_handler[NPROTO];
19 20
20static DEFINE_MUTEX(queue_handler_mutex); 21static DEFINE_MUTEX(queue_handler_mutex);
21 22
22/* return EBUSY when somebody else is registered, return EEXIST if the 23/* return EBUSY when somebody else is registered, return EEXIST if the
23 * same handler is registered, return 0 in case of success. */ 24 * same handler is registered, return 0 in case of success. */
24int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) 25int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
25{ 26{
26 int ret; 27 int ret;
27 28
@@ -44,7 +45,7 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
44EXPORT_SYMBOL(nf_register_queue_handler); 45EXPORT_SYMBOL(nf_register_queue_handler);
45 46
46/* The caller must flush their queue before this */ 47/* The caller must flush their queue before this */
47int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh) 48int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh)
48{ 49{
49 if (pf >= NPROTO) 50 if (pf >= NPROTO)
50 return -EINVAL; 51 return -EINVAL;
@@ -64,7 +65,7 @@ int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
64} 65}
65EXPORT_SYMBOL(nf_unregister_queue_handler); 66EXPORT_SYMBOL(nf_unregister_queue_handler);
66 67
67void nf_unregister_queue_handlers(struct nf_queue_handler *qh) 68void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
68{ 69{
69 int pf; 70 int pf;
70 71
@@ -79,6 +80,27 @@ void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
79} 80}
80EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); 81EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
81 82
83static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
84{
85 /* Release those devices we held, or Alexey will kill me. */
86 if (entry->indev)
87 dev_put(entry->indev);
88 if (entry->outdev)
89 dev_put(entry->outdev);
90#ifdef CONFIG_BRIDGE_NETFILTER
91 if (entry->skb->nf_bridge) {
92 struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
93
94 if (nf_bridge->physindev)
95 dev_put(nf_bridge->physindev);
96 if (nf_bridge->physoutdev)
97 dev_put(nf_bridge->physoutdev);
98 }
99#endif
100 /* Drop reference to owner of hook which queued us. */
101 module_put(entry->elem->owner);
102}
103
82/* 104/*
83 * Any packet that leaves via this function must come back 105 * Any packet that leaves via this function must come back
84 * through nf_reinject(). 106 * through nf_reinject().
@@ -92,84 +114,79 @@ static int __nf_queue(struct sk_buff *skb,
92 unsigned int queuenum) 114 unsigned int queuenum)
93{ 115{
94 int status; 116 int status;
95 struct nf_info *info; 117 struct nf_queue_entry *entry = NULL;
96#ifdef CONFIG_BRIDGE_NETFILTER 118#ifdef CONFIG_BRIDGE_NETFILTER
97 struct net_device *physindev = NULL; 119 struct net_device *physindev;
98 struct net_device *physoutdev = NULL; 120 struct net_device *physoutdev;
99#endif 121#endif
100 struct nf_afinfo *afinfo; 122 const struct nf_afinfo *afinfo;
101 struct nf_queue_handler *qh; 123 const struct nf_queue_handler *qh;
102 124
103 /* QUEUE == DROP if noone is waiting, to be safe. */ 125 /* QUEUE == DROP if noone is waiting, to be safe. */
104 rcu_read_lock(); 126 rcu_read_lock();
105 127
106 qh = rcu_dereference(queue_handler[pf]); 128 qh = rcu_dereference(queue_handler[pf]);
107 if (!qh) { 129 if (!qh)
108 rcu_read_unlock(); 130 goto err_unlock;
109 kfree_skb(skb);
110 return 1;
111 }
112 131
113 afinfo = nf_get_afinfo(pf); 132 afinfo = nf_get_afinfo(pf);
114 if (!afinfo) { 133 if (!afinfo)
115 rcu_read_unlock(); 134 goto err_unlock;
116 kfree_skb(skb); 135
117 return 1; 136 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
118 } 137 if (!entry)
119 138 goto err_unlock;
120 info = kmalloc(sizeof(*info) + afinfo->route_key_size, GFP_ATOMIC); 139
121 if (!info) { 140 *entry = (struct nf_queue_entry) {
122 if (net_ratelimit()) 141 .skb = skb,
123 printk(KERN_ERR "OOM queueing packet %p\n", 142 .elem = list_entry(elem, struct nf_hook_ops, list),
124 skb); 143 .pf = pf,
125 rcu_read_unlock(); 144 .hook = hook,
126 kfree_skb(skb); 145 .indev = indev,
127 return 1; 146 .outdev = outdev,
128 } 147 .okfn = okfn,
129 148 };
130 *info = (struct nf_info) {
131 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
132 149
133 /* If it's going away, ignore hook. */ 150 /* If it's going away, ignore hook. */
134 if (!try_module_get(info->elem->owner)) { 151 if (!try_module_get(entry->elem->owner)) {
135 rcu_read_unlock(); 152 rcu_read_unlock();
136 kfree(info); 153 kfree(entry);
137 return 0; 154 return 0;
138 } 155 }
139 156
140 /* Bump dev refs so they don't vanish while packet is out */ 157 /* Bump dev refs so they don't vanish while packet is out */
141 if (indev) dev_hold(indev); 158 if (indev)
142 if (outdev) dev_hold(outdev); 159 dev_hold(indev);
143 160 if (outdev)
161 dev_hold(outdev);
144#ifdef CONFIG_BRIDGE_NETFILTER 162#ifdef CONFIG_BRIDGE_NETFILTER
145 if (skb->nf_bridge) { 163 if (skb->nf_bridge) {
146 physindev = skb->nf_bridge->physindev; 164 physindev = skb->nf_bridge->physindev;
147 if (physindev) dev_hold(physindev); 165 if (physindev)
166 dev_hold(physindev);
148 physoutdev = skb->nf_bridge->physoutdev; 167 physoutdev = skb->nf_bridge->physoutdev;
149 if (physoutdev) dev_hold(physoutdev); 168 if (physoutdev)
169 dev_hold(physoutdev);
150 } 170 }
151#endif 171#endif
152 afinfo->saveroute(skb, info); 172 afinfo->saveroute(skb, entry);
153 status = qh->outfn(skb, info, queuenum, qh->data); 173 status = qh->outfn(entry, queuenum);
154 174
155 rcu_read_unlock(); 175 rcu_read_unlock();
156 176
157 if (status < 0) { 177 if (status < 0) {
158 /* James M doesn't say fuck enough. */ 178 nf_queue_entry_release_refs(entry);
159 if (indev) dev_put(indev); 179 goto err;
160 if (outdev) dev_put(outdev);
161#ifdef CONFIG_BRIDGE_NETFILTER
162 if (physindev) dev_put(physindev);
163 if (physoutdev) dev_put(physoutdev);
164#endif
165 module_put(info->elem->owner);
166 kfree(info);
167 kfree_skb(skb);
168
169 return 1;
170 } 180 }
171 181
172 return 1; 182 return 1;
183
184err_unlock:
185 rcu_read_unlock();
186err:
187 kfree_skb(skb);
188 kfree(entry);
189 return 1;
173} 190}
174 191
175int nf_queue(struct sk_buff *skb, 192int nf_queue(struct sk_buff *skb,
@@ -212,41 +229,15 @@ int nf_queue(struct sk_buff *skb,
212 return 1; 229 return 1;
213} 230}
214 231
215void nf_reinject(struct sk_buff *skb, struct nf_info *info, 232void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
216 unsigned int verdict)
217{ 233{
218 struct list_head *elem = &info->elem->list; 234 struct sk_buff *skb = entry->skb;
219 struct list_head *i; 235 struct list_head *elem = &entry->elem->list;
220 struct nf_afinfo *afinfo; 236 const struct nf_afinfo *afinfo;
221 237
222 rcu_read_lock(); 238 rcu_read_lock();
223 239
224 /* Release those devices we held, or Alexey will kill me. */ 240 nf_queue_entry_release_refs(entry);
225 if (info->indev) dev_put(info->indev);
226 if (info->outdev) dev_put(info->outdev);
227#ifdef CONFIG_BRIDGE_NETFILTER
228 if (skb->nf_bridge) {
229 if (skb->nf_bridge->physindev)
230 dev_put(skb->nf_bridge->physindev);
231 if (skb->nf_bridge->physoutdev)
232 dev_put(skb->nf_bridge->physoutdev);
233 }
234#endif
235
236 /* Drop reference to owner of hook which queued us. */
237 module_put(info->elem->owner);
238
239 list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
240 if (i == elem)
241 break;
242 }
243
244 if (i == &nf_hooks[info->pf][info->hook]) {
245 /* The module which sent it to userspace is gone. */
246 NFDEBUG("%s: module disappeared, dropping packet.\n",
247 __FUNCTION__);
248 verdict = NF_DROP;
249 }
250 241
251 /* Continue traversal iff userspace said ok... */ 242 /* Continue traversal iff userspace said ok... */
252 if (verdict == NF_REPEAT) { 243 if (verdict == NF_REPEAT) {
@@ -255,28 +246,30 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
255 } 246 }
256 247
257 if (verdict == NF_ACCEPT) { 248 if (verdict == NF_ACCEPT) {
258 afinfo = nf_get_afinfo(info->pf); 249 afinfo = nf_get_afinfo(entry->pf);
259 if (!afinfo || afinfo->reroute(skb, info) < 0) 250 if (!afinfo || afinfo->reroute(skb, entry) < 0)
260 verdict = NF_DROP; 251 verdict = NF_DROP;
261 } 252 }
262 253
263 if (verdict == NF_ACCEPT) { 254 if (verdict == NF_ACCEPT) {
264 next_hook: 255 next_hook:
265 verdict = nf_iterate(&nf_hooks[info->pf][info->hook], 256 verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
266 skb, info->hook, 257 skb, entry->hook,
267 info->indev, info->outdev, &elem, 258 entry->indev, entry->outdev, &elem,
268 info->okfn, INT_MIN); 259 entry->okfn, INT_MIN);
269 } 260 }
270 261
271 switch (verdict & NF_VERDICT_MASK) { 262 switch (verdict & NF_VERDICT_MASK) {
272 case NF_ACCEPT: 263 case NF_ACCEPT:
273 case NF_STOP: 264 case NF_STOP:
274 info->okfn(skb); 265 local_bh_disable();
266 entry->okfn(skb);
267 local_bh_enable();
275 case NF_STOLEN: 268 case NF_STOLEN:
276 break; 269 break;
277 case NF_QUEUE: 270 case NF_QUEUE:
278 if (!__nf_queue(skb, elem, info->pf, info->hook, 271 if (!__nf_queue(skb, elem, entry->pf, entry->hook,
279 info->indev, info->outdev, info->okfn, 272 entry->indev, entry->outdev, entry->okfn,
280 verdict >> NF_VERDICT_BITS)) 273 verdict >> NF_VERDICT_BITS))
281 goto next_hook; 274 goto next_hook;
282 break; 275 break;
@@ -284,7 +277,7 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
284 kfree_skb(skb); 277 kfree_skb(skb);
285 } 278 }
286 rcu_read_unlock(); 279 rcu_read_unlock();
287 kfree(info); 280 kfree(entry);
288 return; 281 return;
289} 282}
290EXPORT_SYMBOL(nf_reinject); 283EXPORT_SYMBOL(nf_reinject);
@@ -317,7 +310,7 @@ static int seq_show(struct seq_file *s, void *v)
317{ 310{
318 int ret; 311 int ret;
319 loff_t *pos = v; 312 loff_t *pos = v;
320 struct nf_queue_handler *qh; 313 const struct nf_queue_handler *qh;
321 314
322 rcu_read_lock(); 315 rcu_read_lock();
323 qh = rcu_dereference(queue_handler[*pos]); 316 qh = rcu_dereference(queue_handler[*pos]);
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index aa2831587b82..3dd4b3c76d81 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -23,14 +23,13 @@ static inline int overlap(int min1, int max1, int min2, int max2)
23/* Functions to register sockopt ranges (exclusive). */ 23/* Functions to register sockopt ranges (exclusive). */
24int nf_register_sockopt(struct nf_sockopt_ops *reg) 24int nf_register_sockopt(struct nf_sockopt_ops *reg)
25{ 25{
26 struct list_head *i; 26 struct nf_sockopt_ops *ops;
27 int ret = 0; 27 int ret = 0;
28 28
29 if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) 29 if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
30 return -EINTR; 30 return -EINTR;
31 31
32 list_for_each(i, &nf_sockopts) { 32 list_for_each_entry(ops, &nf_sockopts, list) {
33 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
34 if (ops->pf == reg->pf 33 if (ops->pf == reg->pf
35 && (overlap(ops->set_optmin, ops->set_optmax, 34 && (overlap(ops->set_optmin, ops->set_optmax,
36 reg->set_optmin, reg->set_optmax) 35 reg->set_optmin, reg->set_optmax)
@@ -61,48 +60,57 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
61} 60}
62EXPORT_SYMBOL(nf_unregister_sockopt); 61EXPORT_SYMBOL(nf_unregister_sockopt);
63 62
64/* Call get/setsockopt() */ 63static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf,
65static int nf_sockopt(struct sock *sk, int pf, int val, 64 int val, int get)
66 char __user *opt, int *len, int get)
67{ 65{
68 struct list_head *i;
69 struct nf_sockopt_ops *ops; 66 struct nf_sockopt_ops *ops;
70 int ret;
71 67
72 if (sk->sk_net != &init_net) 68 if (sk->sk_net != &init_net)
73 return -ENOPROTOOPT; 69 return ERR_PTR(-ENOPROTOOPT);
74 70
75 if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) 71 if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
76 return -EINTR; 72 return ERR_PTR(-EINTR);
77 73
78 list_for_each(i, &nf_sockopts) { 74 list_for_each_entry(ops, &nf_sockopts, list) {
79 ops = (struct nf_sockopt_ops *)i;
80 if (ops->pf == pf) { 75 if (ops->pf == pf) {
81 if (!try_module_get(ops->owner)) 76 if (!try_module_get(ops->owner))
82 goto out_nosup; 77 goto out_nosup;
78
83 if (get) { 79 if (get) {
84 if (val >= ops->get_optmin 80 if (val >= ops->get_optmin &&
85 && val < ops->get_optmax) { 81 val < ops->get_optmax)
86 mutex_unlock(&nf_sockopt_mutex);
87 ret = ops->get(sk, val, opt, len);
88 goto out; 82 goto out;
89 }
90 } else { 83 } else {
91 if (val >= ops->set_optmin 84 if (val >= ops->set_optmin &&
92 && val < ops->set_optmax) { 85 val < ops->set_optmax)
93 mutex_unlock(&nf_sockopt_mutex);
94 ret = ops->set(sk, val, opt, *len);
95 goto out; 86 goto out;
96 }
97 } 87 }
98 module_put(ops->owner); 88 module_put(ops->owner);
99 } 89 }
100 } 90 }
101 out_nosup: 91out_nosup:
92 ops = ERR_PTR(-ENOPROTOOPT);
93out:
102 mutex_unlock(&nf_sockopt_mutex); 94 mutex_unlock(&nf_sockopt_mutex);
103 return -ENOPROTOOPT; 95 return ops;
96}
97
98/* Call get/setsockopt() */
99static int nf_sockopt(struct sock *sk, int pf, int val,
100 char __user *opt, int *len, int get)
101{
102 struct nf_sockopt_ops *ops;
103 int ret;
104
105 ops = nf_sockopt_find(sk, pf, val, get);
106 if (IS_ERR(ops))
107 return PTR_ERR(ops);
108
109 if (get)
110 ret = ops->get(sk, val, opt, len);
111 else
112 ret = ops->set(sk, val, opt, *len);
104 113
105 out:
106 module_put(ops->owner); 114 module_put(ops->owner);
107 return ret; 115 return ret;
108} 116}
@@ -124,56 +132,25 @@ EXPORT_SYMBOL(nf_getsockopt);
124static int compat_nf_sockopt(struct sock *sk, int pf, int val, 132static int compat_nf_sockopt(struct sock *sk, int pf, int val,
125 char __user *opt, int *len, int get) 133 char __user *opt, int *len, int get)
126{ 134{
127 struct list_head *i;
128 struct nf_sockopt_ops *ops; 135 struct nf_sockopt_ops *ops;
129 int ret; 136 int ret;
130 137
131 if (sk->sk_net != &init_net) 138 ops = nf_sockopt_find(sk, pf, val, get);
132 return -ENOPROTOOPT; 139 if (IS_ERR(ops))
133 140 return PTR_ERR(ops);
134 141
135 if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) 142 if (get) {
136 return -EINTR; 143 if (ops->compat_get)
137 144 ret = ops->compat_get(sk, val, opt, len);
138 list_for_each(i, &nf_sockopts) { 145 else
139 ops = (struct nf_sockopt_ops *)i; 146 ret = ops->get(sk, val, opt, len);
140 if (ops->pf == pf) { 147 } else {
141 if (!try_module_get(ops->owner)) 148 if (ops->compat_set)
142 goto out_nosup; 149 ret = ops->compat_set(sk, val, opt, *len);
143 150 else
144 if (get) { 151 ret = ops->set(sk, val, opt, *len);
145 if (val >= ops->get_optmin
146 && val < ops->get_optmax) {
147 mutex_unlock(&nf_sockopt_mutex);
148 if (ops->compat_get)
149 ret = ops->compat_get(sk,
150 val, opt, len);
151 else
152 ret = ops->get(sk,
153 val, opt, len);
154 goto out;
155 }
156 } else {
157 if (val >= ops->set_optmin
158 && val < ops->set_optmax) {
159 mutex_unlock(&nf_sockopt_mutex);
160 if (ops->compat_set)
161 ret = ops->compat_set(sk,
162 val, opt, *len);
163 else
164 ret = ops->set(sk,
165 val, opt, *len);
166 goto out;
167 }
168 }
169 module_put(ops->owner);
170 }
171 } 152 }
172 out_nosup:
173 mutex_unlock(&nf_sockopt_mutex);
174 return -ENOPROTOOPT;
175 153
176 out:
177 module_put(ops->owner); 154 module_put(ops->owner);
178 return ret; 155 return ret;
179} 156}
diff --git a/net/netfilter/nf_sysctl.c b/net/netfilter/nf_sysctl.c
deleted file mode 100644
index ee34589e48a4..000000000000
--- a/net/netfilter/nf_sysctl.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/* nf_sysctl.c netfilter sysctl registration/unregistation
2 *
3 * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
4 */
5#include <linux/module.h>
6#include <linux/sysctl.h>
7#include <linux/string.h>
8#include <linux/slab.h>
9
10static void
11path_free(struct ctl_table *path, struct ctl_table *table)
12{
13 struct ctl_table *t, *next;
14
15 for (t = path; t != NULL && t != table; t = next) {
16 next = t->child;
17 kfree(t);
18 }
19}
20
21static struct ctl_table *
22path_dup(struct ctl_table *path, struct ctl_table *table)
23{
24 struct ctl_table *t, *last = NULL, *tmp;
25
26 for (t = path; t != NULL; t = t->child) {
27 /* twice the size since path elements are terminated by an
28 * empty element */
29 tmp = kmemdup(t, 2 * sizeof(*t), GFP_KERNEL);
30 if (tmp == NULL) {
31 if (last != NULL)
32 path_free(path, table);
33 return NULL;
34 }
35
36 if (last != NULL)
37 last->child = tmp;
38 else
39 path = tmp;
40 last = tmp;
41 }
42
43 if (last != NULL)
44 last->child = table;
45 else
46 path = table;
47
48 return path;
49}
50
51struct ctl_table_header *
52nf_register_sysctl_table(struct ctl_table *path, struct ctl_table *table)
53{
54 struct ctl_table_header *header;
55
56 path = path_dup(path, table);
57 if (path == NULL)
58 return NULL;
59 header = register_sysctl_table(path);
60 if (header == NULL)
61 path_free(path, table);
62 return header;
63}
64EXPORT_SYMBOL_GPL(nf_register_sysctl_table);
65
66void
67nf_unregister_sysctl_table(struct ctl_table_header *header,
68 struct ctl_table *table)
69{
70 struct ctl_table *path = header->ctl_table;
71
72 unregister_sysctl_table(header);
73 path_free(path, table);
74}
75EXPORT_SYMBOL_GPL(nf_unregister_sysctl_table);
76
77/* net/netfilter */
78static struct ctl_table nf_net_netfilter_table[] = {
79 {
80 .ctl_name = NET_NETFILTER,
81 .procname = "netfilter",
82 .mode = 0555,
83 },
84 {
85 .ctl_name = 0
86 }
87};
88struct ctl_table nf_net_netfilter_sysctl_path[] = {
89 {
90 .ctl_name = CTL_NET,
91 .procname = "net",
92 .mode = 0555,
93 .child = nf_net_netfilter_table,
94 },
95 {
96 .ctl_name = 0
97 }
98};
99EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
100
101/* net/ipv4/netfilter */
102static struct ctl_table nf_net_ipv4_netfilter_table[] = {
103 {
104 .ctl_name = NET_IPV4_NETFILTER,
105 .procname = "netfilter",
106 .mode = 0555,
107 },
108 {
109 .ctl_name = 0
110 }
111};
112static struct ctl_table nf_net_ipv4_table[] = {
113 {
114 .ctl_name = NET_IPV4,
115 .procname = "ipv4",
116 .mode = 0555,
117 .child = nf_net_ipv4_netfilter_table,
118 },
119 {
120 .ctl_name = 0
121 }
122};
123struct ctl_table nf_net_ipv4_netfilter_sysctl_path[] = {
124 {
125 .ctl_name = CTL_NET,
126 .procname = "net",
127 .mode = 0555,
128 .child = nf_net_ipv4_table,
129 },
130 {
131 .ctl_name = 0
132 }
133};
134EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2128542995f7..b75c9c4a995d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -179,7 +179,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
179static void __exit nfnetlink_exit(void) 179static void __exit nfnetlink_exit(void)
180{ 180{
181 printk("Removing netfilter NETLINK layer.\n"); 181 printk("Removing netfilter NETLINK layer.\n");
182 sock_release(nfnl->sk_socket); 182 netlink_kernel_release(nfnl);
183 return; 183 return;
184} 184}
185 185
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 2c7bd2eb0294..7efa40d47393 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -29,6 +29,7 @@
29#include <linux/jhash.h> 29#include <linux/jhash.h>
30#include <linux/random.h> 30#include <linux/random.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <net/netfilter/nf_log.h>
32 33
33#include <asm/atomic.h> 34#include <asm/atomic.h>
34 35
@@ -44,14 +45,6 @@
44#define PRINTR(x, args...) do { if (net_ratelimit()) \ 45#define PRINTR(x, args...) do { if (net_ratelimit()) \
45 printk(x, ## args); } while (0); 46 printk(x, ## args); } while (0);
46 47
47#if 0
48#define UDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \
49 __FILE__, __LINE__, __FUNCTION__, \
50 ## args)
51#else
52#define UDEBUG(x, ...)
53#endif
54
55struct nfulnl_instance { 48struct nfulnl_instance {
56 struct hlist_node hlist; /* global list of instances */ 49 struct hlist_node hlist; /* global list of instances */
57 spinlock_t lock; 50 spinlock_t lock;
@@ -92,8 +85,6 @@ __instance_lookup(u_int16_t group_num)
92 struct hlist_node *pos; 85 struct hlist_node *pos;
93 struct nfulnl_instance *inst; 86 struct nfulnl_instance *inst;
94 87
95 UDEBUG("entering (group_num=%u)\n", group_num);
96
97 head = &instance_table[instance_hashfn(group_num)]; 88 head = &instance_table[instance_hashfn(group_num)];
98 hlist_for_each_entry(inst, pos, head, hlist) { 89 hlist_for_each_entry(inst, pos, head, hlist) {
99 if (inst->group_num == group_num) 90 if (inst->group_num == group_num)
@@ -126,7 +117,6 @@ static void
126instance_put(struct nfulnl_instance *inst) 117instance_put(struct nfulnl_instance *inst)
127{ 118{
128 if (inst && atomic_dec_and_test(&inst->use)) { 119 if (inst && atomic_dec_and_test(&inst->use)) {
129 UDEBUG("kfree(inst=%p)\n", inst);
130 kfree(inst); 120 kfree(inst);
131 module_put(THIS_MODULE); 121 module_put(THIS_MODULE);
132 } 122 }
@@ -138,23 +128,23 @@ static struct nfulnl_instance *
138instance_create(u_int16_t group_num, int pid) 128instance_create(u_int16_t group_num, int pid)
139{ 129{
140 struct nfulnl_instance *inst; 130 struct nfulnl_instance *inst;
141 131 int err;
142 UDEBUG("entering (group_num=%u, pid=%d)\n", group_num,
143 pid);
144 132
145 write_lock_bh(&instances_lock); 133 write_lock_bh(&instances_lock);
146 if (__instance_lookup(group_num)) { 134 if (__instance_lookup(group_num)) {
147 inst = NULL; 135 err = -EEXIST;
148 UDEBUG("aborting, instance already exists\n");
149 goto out_unlock; 136 goto out_unlock;
150 } 137 }
151 138
152 inst = kzalloc(sizeof(*inst), GFP_ATOMIC); 139 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
153 if (!inst) 140 if (!inst) {
141 err = -ENOMEM;
154 goto out_unlock; 142 goto out_unlock;
143 }
155 144
156 if (!try_module_get(THIS_MODULE)) { 145 if (!try_module_get(THIS_MODULE)) {
157 kfree(inst); 146 kfree(inst);
147 err = -EAGAIN;
158 goto out_unlock; 148 goto out_unlock;
159 } 149 }
160 150
@@ -177,16 +167,13 @@ instance_create(u_int16_t group_num, int pid)
177 hlist_add_head(&inst->hlist, 167 hlist_add_head(&inst->hlist,
178 &instance_table[instance_hashfn(group_num)]); 168 &instance_table[instance_hashfn(group_num)]);
179 169
180 UDEBUG("newly added node: %p, next=%p\n", &inst->hlist,
181 inst->hlist.next);
182
183 write_unlock_bh(&instances_lock); 170 write_unlock_bh(&instances_lock);
184 171
185 return inst; 172 return inst;
186 173
187out_unlock: 174out_unlock:
188 write_unlock_bh(&instances_lock); 175 write_unlock_bh(&instances_lock);
189 return NULL; 176 return ERR_PTR(err);
190} 177}
191 178
192static void __nfulnl_flush(struct nfulnl_instance *inst); 179static void __nfulnl_flush(struct nfulnl_instance *inst);
@@ -195,9 +182,6 @@ static void
195__instance_destroy(struct nfulnl_instance *inst) 182__instance_destroy(struct nfulnl_instance *inst)
196{ 183{
197 /* first pull it out of the global list */ 184 /* first pull it out of the global list */
198 UDEBUG("removing instance %p (queuenum=%u) from hash\n",
199 inst, inst->group_num);
200
201 hlist_del(&inst->hlist); 185 hlist_del(&inst->hlist);
202 186
203 /* then flush all pending packets from skb */ 187 /* then flush all pending packets from skb */
@@ -305,8 +289,6 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
305 struct sk_buff *skb; 289 struct sk_buff *skb;
306 unsigned int n; 290 unsigned int n;
307 291
308 UDEBUG("entered (%u, %u)\n", inst_size, pkt_size);
309
310 /* alloc skb which should be big enough for a whole multipart 292 /* alloc skb which should be big enough for a whole multipart
311 * message. WARNING: has to be <= 128k due to slab restrictions */ 293 * message. WARNING: has to be <= 128k due to slab restrictions */
312 294
@@ -341,10 +323,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
341 sizeof(struct nfgenmsg)); 323 sizeof(struct nfgenmsg));
342 324
343 status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); 325 status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
344 if (status < 0) {
345 UDEBUG("netlink_unicast() failed\n");
346 /* FIXME: statistics */
347 }
348 326
349 inst->qlen = 0; 327 inst->qlen = 0;
350 inst->skb = NULL; 328 inst->skb = NULL;
@@ -368,8 +346,6 @@ nfulnl_timer(unsigned long data)
368{ 346{
369 struct nfulnl_instance *inst = (struct nfulnl_instance *)data; 347 struct nfulnl_instance *inst = (struct nfulnl_instance *)data;
370 348
371 UDEBUG("timer function called, flushing buffer\n");
372
373 spin_lock_bh(&inst->lock); 349 spin_lock_bh(&inst->lock);
374 if (inst->skb) 350 if (inst->skb)
375 __nfulnl_send(inst); 351 __nfulnl_send(inst);
@@ -396,8 +372,6 @@ __build_packet_message(struct nfulnl_instance *inst,
396 __be32 tmp_uint; 372 __be32 tmp_uint;
397 sk_buff_data_t old_tail = inst->skb->tail; 373 sk_buff_data_t old_tail = inst->skb->tail;
398 374
399 UDEBUG("entered\n");
400
401 nlh = NLMSG_PUT(inst->skb, 0, 0, 375 nlh = NLMSG_PUT(inst->skb, 0, 0,
402 NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, 376 NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
403 sizeof(struct nfgenmsg)); 377 sizeof(struct nfgenmsg));
@@ -415,32 +389,27 @@ __build_packet_message(struct nfulnl_instance *inst,
415 NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); 389 NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix);
416 390
417 if (indev) { 391 if (indev) {
418 tmp_uint = htonl(indev->ifindex);
419#ifndef CONFIG_BRIDGE_NETFILTER 392#ifndef CONFIG_BRIDGE_NETFILTER
420 NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), 393 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
421 &tmp_uint); 394 htonl(indev->ifindex));
422#else 395#else
423 if (pf == PF_BRIDGE) { 396 if (pf == PF_BRIDGE) {
424 /* Case 1: outdev is physical input device, we need to 397 /* Case 1: outdev is physical input device, we need to
425 * look for bridge group (when called from 398 * look for bridge group (when called from
426 * netfilter_bridge) */ 399 * netfilter_bridge) */
427 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, 400 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
428 sizeof(tmp_uint), &tmp_uint); 401 htonl(indev->ifindex));
429 /* this is the bridge group "brX" */ 402 /* this is the bridge group "brX" */
430 tmp_uint = htonl(indev->br_port->br->dev->ifindex); 403 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
431 NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, 404 htonl(indev->br_port->br->dev->ifindex));
432 sizeof(tmp_uint), &tmp_uint);
433 } else { 405 } else {
434 /* Case 2: indev is bridge group, we need to look for 406 /* Case 2: indev is bridge group, we need to look for
435 * physical device (when called from ipv4) */ 407 * physical device (when called from ipv4) */
436 NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, 408 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
437 sizeof(tmp_uint), &tmp_uint); 409 htonl(indev->ifindex));
438 if (skb->nf_bridge && skb->nf_bridge->physindev) { 410 if (skb->nf_bridge && skb->nf_bridge->physindev)
439 tmp_uint = 411 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
440 htonl(skb->nf_bridge->physindev->ifindex); 412 htonl(skb->nf_bridge->physindev->ifindex));
441 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
442 sizeof(tmp_uint), &tmp_uint);
443 }
444 } 413 }
445#endif 414#endif
446 } 415 }
@@ -448,38 +417,32 @@ __build_packet_message(struct nfulnl_instance *inst,
448 if (outdev) { 417 if (outdev) {
449 tmp_uint = htonl(outdev->ifindex); 418 tmp_uint = htonl(outdev->ifindex);
450#ifndef CONFIG_BRIDGE_NETFILTER 419#ifndef CONFIG_BRIDGE_NETFILTER
451 NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), 420 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
452 &tmp_uint); 421 htonl(outdev->ifindex));
453#else 422#else
454 if (pf == PF_BRIDGE) { 423 if (pf == PF_BRIDGE) {
455 /* Case 1: outdev is physical output device, we need to 424 /* Case 1: outdev is physical output device, we need to
456 * look for bridge group (when called from 425 * look for bridge group (when called from
457 * netfilter_bridge) */ 426 * netfilter_bridge) */
458 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, 427 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
459 sizeof(tmp_uint), &tmp_uint); 428 htonl(outdev->ifindex));
460 /* this is the bridge group "brX" */ 429 /* this is the bridge group "brX" */
461 tmp_uint = htonl(outdev->br_port->br->dev->ifindex); 430 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
462 NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, 431 htonl(outdev->br_port->br->dev->ifindex));
463 sizeof(tmp_uint), &tmp_uint);
464 } else { 432 } else {
465 /* Case 2: indev is a bridge group, we need to look 433 /* Case 2: indev is a bridge group, we need to look
466 * for physical device (when called from ipv4) */ 434 * for physical device (when called from ipv4) */
467 NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, 435 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
468 sizeof(tmp_uint), &tmp_uint); 436 htonl(outdev->ifindex));
469 if (skb->nf_bridge && skb->nf_bridge->physoutdev) { 437 if (skb->nf_bridge && skb->nf_bridge->physoutdev)
470 tmp_uint = 438 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
471 htonl(skb->nf_bridge->physoutdev->ifindex); 439 htonl(skb->nf_bridge->physoutdev->ifindex));
472 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
473 sizeof(tmp_uint), &tmp_uint);
474 }
475 } 440 }
476#endif 441#endif
477 } 442 }
478 443
479 if (skb->mark) { 444 if (skb->mark)
480 tmp_uint = htonl(skb->mark); 445 NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark));
481 NLA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint);
482 }
483 446
484 if (indev && skb->dev) { 447 if (indev && skb->dev) {
485 struct nfulnl_msg_packet_hw phw; 448 struct nfulnl_msg_packet_hw phw;
@@ -504,23 +467,23 @@ __build_packet_message(struct nfulnl_instance *inst,
504 read_lock_bh(&skb->sk->sk_callback_lock); 467 read_lock_bh(&skb->sk->sk_callback_lock);
505 if (skb->sk->sk_socket && skb->sk->sk_socket->file) { 468 if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
506 __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); 469 __be32 uid = htonl(skb->sk->sk_socket->file->f_uid);
470 __be32 gid = htonl(skb->sk->sk_socket->file->f_gid);
507 /* need to unlock here since NLA_PUT may goto */ 471 /* need to unlock here since NLA_PUT may goto */
508 read_unlock_bh(&skb->sk->sk_callback_lock); 472 read_unlock_bh(&skb->sk->sk_callback_lock);
509 NLA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); 473 NLA_PUT_BE32(inst->skb, NFULA_UID, uid);
474 NLA_PUT_BE32(inst->skb, NFULA_GID, gid);
510 } else 475 } else
511 read_unlock_bh(&skb->sk->sk_callback_lock); 476 read_unlock_bh(&skb->sk->sk_callback_lock);
512 } 477 }
513 478
514 /* local sequence number */ 479 /* local sequence number */
515 if (inst->flags & NFULNL_CFG_F_SEQ) { 480 if (inst->flags & NFULNL_CFG_F_SEQ)
516 tmp_uint = htonl(inst->seq++); 481 NLA_PUT_BE32(inst->skb, NFULA_SEQ, htonl(inst->seq++));
517 NLA_PUT(inst->skb, NFULA_SEQ, sizeof(tmp_uint), &tmp_uint); 482
518 }
519 /* global sequence number */ 483 /* global sequence number */
520 if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) { 484 if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
521 tmp_uint = htonl(atomic_inc_return(&global_seq)); 485 NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
522 NLA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint); 486 htonl(atomic_inc_return(&global_seq)));
523 }
524 487
525 if (data_len) { 488 if (data_len) {
526 struct nlattr *nla; 489 struct nlattr *nla;
@@ -543,7 +506,6 @@ __build_packet_message(struct nfulnl_instance *inst,
543 return 0; 506 return 0;
544 507
545nlmsg_failure: 508nlmsg_failure:
546 UDEBUG("nlmsg_failure\n");
547nla_put_failure: 509nla_put_failure:
548 PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); 510 PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
549 return -1; 511 return -1;
@@ -604,12 +566,11 @@ nfulnl_log_packet(unsigned int pf,
604#endif 566#endif
605 + nla_total_size(sizeof(u_int32_t)) /* mark */ 567 + nla_total_size(sizeof(u_int32_t)) /* mark */
606 + nla_total_size(sizeof(u_int32_t)) /* uid */ 568 + nla_total_size(sizeof(u_int32_t)) /* uid */
569 + nla_total_size(sizeof(u_int32_t)) /* gid */
607 + nla_total_size(plen) /* prefix */ 570 + nla_total_size(plen) /* prefix */
608 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) 571 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
609 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); 572 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
610 573
611 UDEBUG("initial size=%u\n", size);
612
613 spin_lock_bh(&inst->lock); 574 spin_lock_bh(&inst->lock);
614 575
615 if (inst->flags & NFULNL_CFG_F_SEQ) 576 if (inst->flags & NFULNL_CFG_F_SEQ)
@@ -636,7 +597,6 @@ nfulnl_log_packet(unsigned int pf,
636 data_len = inst->copy_range; 597 data_len = inst->copy_range;
637 598
638 size += nla_total_size(data_len); 599 size += nla_total_size(data_len);
639 UDEBUG("copy_packet, therefore size now %u\n", size);
640 break; 600 break;
641 601
642 default: 602 default:
@@ -647,8 +607,6 @@ nfulnl_log_packet(unsigned int pf,
647 size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { 607 size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
648 /* either the queue len is too high or we don't have 608 /* either the queue len is too high or we don't have
649 * enough room in the skb left. flush to userspace. */ 609 * enough room in the skb left. flush to userspace. */
650 UDEBUG("flushing old skb\n");
651
652 __nfulnl_flush(inst); 610 __nfulnl_flush(inst);
653 } 611 }
654 612
@@ -658,7 +616,6 @@ nfulnl_log_packet(unsigned int pf,
658 goto alloc_failure; 616 goto alloc_failure;
659 } 617 }
660 618
661 UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
662 inst->qlen++; 619 inst->qlen++;
663 620
664 __build_packet_message(inst, skb, data_len, pf, 621 __build_packet_message(inst, skb, data_len, pf,
@@ -680,7 +637,6 @@ unlock_and_release:
680 return; 637 return;
681 638
682alloc_failure: 639alloc_failure:
683 UDEBUG("error allocating skb\n");
684 /* FIXME: statistics */ 640 /* FIXME: statistics */
685 goto unlock_and_release; 641 goto unlock_and_release;
686} 642}
@@ -703,7 +659,6 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
703 struct hlist_head *head = &instance_table[i]; 659 struct hlist_head *head = &instance_table[i];
704 660
705 hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { 661 hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
706 UDEBUG("node = %p\n", inst);
707 if ((n->net == &init_net) && 662 if ((n->net == &init_net) &&
708 (n->pid == inst->peer_pid)) 663 (n->pid == inst->peer_pid))
709 __instance_destroy(inst); 664 __instance_destroy(inst);
@@ -725,7 +680,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
725 return -ENOTSUPP; 680 return -ENOTSUPP;
726} 681}
727 682
728static struct nf_logger nfulnl_logger = { 683static const struct nf_logger nfulnl_logger = {
729 .name = "nfnetlink_log", 684 .name = "nfnetlink_log",
730 .logfn = &nfulnl_log_packet, 685 .logfn = &nfulnl_log_packet,
731 .me = THIS_MODULE, 686 .me = THIS_MODULE,
@@ -749,14 +704,17 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
749 struct nfulnl_instance *inst; 704 struct nfulnl_instance *inst;
750 int ret = 0; 705 int ret = 0;
751 706
752 UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
753
754 inst = instance_lookup_get(group_num); 707 inst = instance_lookup_get(group_num);
708 if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
709 ret = -EPERM;
710 goto out_put;
711 }
712
755 if (nfula[NFULA_CFG_CMD]) { 713 if (nfula[NFULA_CFG_CMD]) {
756 u_int8_t pf = nfmsg->nfgen_family; 714 u_int8_t pf = nfmsg->nfgen_family;
757 struct nfulnl_msg_config_cmd *cmd; 715 struct nfulnl_msg_config_cmd *cmd;
716
758 cmd = nla_data(nfula[NFULA_CFG_CMD]); 717 cmd = nla_data(nfula[NFULA_CFG_CMD]);
759 UDEBUG("found CFG_CMD for\n");
760 718
761 switch (cmd->command) { 719 switch (cmd->command) {
762 case NFULNL_CFG_CMD_BIND: 720 case NFULNL_CFG_CMD_BIND:
@@ -767,8 +725,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
767 725
768 inst = instance_create(group_num, 726 inst = instance_create(group_num,
769 NETLINK_CB(skb).pid); 727 NETLINK_CB(skb).pid);
770 if (!inst) { 728 if (IS_ERR(inst)) {
771 ret = -EINVAL; 729 ret = PTR_ERR(inst);
772 goto out; 730 goto out;
773 } 731 }
774 break; 732 break;
@@ -778,78 +736,71 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
778 goto out; 736 goto out;
779 } 737 }
780 738
781 if (inst->peer_pid != NETLINK_CB(skb).pid) {
782 ret = -EPERM;
783 goto out_put;
784 }
785
786 instance_destroy(inst); 739 instance_destroy(inst);
787 goto out; 740 goto out;
788 case NFULNL_CFG_CMD_PF_BIND: 741 case NFULNL_CFG_CMD_PF_BIND:
789 UDEBUG("registering log handler for pf=%u\n", pf);
790 ret = nf_log_register(pf, &nfulnl_logger); 742 ret = nf_log_register(pf, &nfulnl_logger);
791 break; 743 break;
792 case NFULNL_CFG_CMD_PF_UNBIND: 744 case NFULNL_CFG_CMD_PF_UNBIND:
793 UDEBUG("unregistering log handler for pf=%u\n", pf);
794 /* This is a bug and a feature. We cannot unregister 745 /* This is a bug and a feature. We cannot unregister
795 * other handlers, like nfnetlink_inst can */ 746 * other handlers, like nfnetlink_inst can */
796 nf_log_unregister_pf(pf); 747 nf_log_unregister_pf(pf);
797 break; 748 break;
798 default: 749 default:
799 ret = -EINVAL; 750 ret = -ENOTSUPP;
800 break; 751 break;
801 } 752 }
802
803 if (!inst)
804 goto out;
805 } else {
806 if (!inst) {
807 UDEBUG("no config command, and no instance for "
808 "group=%u pid=%u =>ENOENT\n",
809 group_num, NETLINK_CB(skb).pid);
810 ret = -ENOENT;
811 goto out;
812 }
813
814 if (inst->peer_pid != NETLINK_CB(skb).pid) {
815 UDEBUG("no config command, and wrong pid\n");
816 ret = -EPERM;
817 goto out_put;
818 }
819 } 753 }
820 754
821 if (nfula[NFULA_CFG_MODE]) { 755 if (nfula[NFULA_CFG_MODE]) {
822 struct nfulnl_msg_config_mode *params; 756 struct nfulnl_msg_config_mode *params;
823 params = nla_data(nfula[NFULA_CFG_MODE]); 757 params = nla_data(nfula[NFULA_CFG_MODE]);
824 758
759 if (!inst) {
760 ret = -ENODEV;
761 goto out;
762 }
825 nfulnl_set_mode(inst, params->copy_mode, 763 nfulnl_set_mode(inst, params->copy_mode,
826 ntohl(params->copy_range)); 764 ntohl(params->copy_range));
827 } 765 }
828 766
829 if (nfula[NFULA_CFG_TIMEOUT]) { 767 if (nfula[NFULA_CFG_TIMEOUT]) {
830 __be32 timeout = 768 __be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
831 *(__be32 *)nla_data(nfula[NFULA_CFG_TIMEOUT]);
832 769
770 if (!inst) {
771 ret = -ENODEV;
772 goto out;
773 }
833 nfulnl_set_timeout(inst, ntohl(timeout)); 774 nfulnl_set_timeout(inst, ntohl(timeout));
834 } 775 }
835 776
836 if (nfula[NFULA_CFG_NLBUFSIZ]) { 777 if (nfula[NFULA_CFG_NLBUFSIZ]) {
837 __be32 nlbufsiz = 778 __be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
838 *(__be32 *)nla_data(nfula[NFULA_CFG_NLBUFSIZ]);
839 779
780 if (!inst) {
781 ret = -ENODEV;
782 goto out;
783 }
840 nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); 784 nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
841 } 785 }
842 786
843 if (nfula[NFULA_CFG_QTHRESH]) { 787 if (nfula[NFULA_CFG_QTHRESH]) {
844 __be32 qthresh = 788 __be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
845 *(__be32 *)nla_data(nfula[NFULA_CFG_QTHRESH]);
846 789
790 if (!inst) {
791 ret = -ENODEV;
792 goto out;
793 }
847 nfulnl_set_qthresh(inst, ntohl(qthresh)); 794 nfulnl_set_qthresh(inst, ntohl(qthresh));
848 } 795 }
849 796
850 if (nfula[NFULA_CFG_FLAGS]) { 797 if (nfula[NFULA_CFG_FLAGS]) {
851 __be16 flags = 798 __be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]);
852 *(__be16 *)nla_data(nfula[NFULA_CFG_FLAGS]); 799
800 if (!inst) {
801 ret = -ENODEV;
802 goto out;
803 }
853 nfulnl_set_flags(inst, ntohs(flags)); 804 nfulnl_set_flags(inst, ntohs(flags));
854 } 805 }
855 806
@@ -915,6 +866,7 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
915} 866}
916 867
917static void *seq_start(struct seq_file *seq, loff_t *pos) 868static void *seq_start(struct seq_file *seq, loff_t *pos)
869 __acquires(instances_lock)
918{ 870{
919 read_lock_bh(&instances_lock); 871 read_lock_bh(&instances_lock);
920 return get_idx(seq->private, *pos); 872 return get_idx(seq->private, *pos);
@@ -927,6 +879,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
927} 879}
928 880
929static void seq_stop(struct seq_file *s, void *v) 881static void seq_stop(struct seq_file *s, void *v)
882 __releases(instances_lock)
930{ 883{
931 read_unlock_bh(&instances_lock); 884 read_unlock_bh(&instances_lock);
932} 885}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3ceeffcf6f9d..a48b20fe9cd6 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -3,6 +3,7 @@
3 * userspace via nfetlink. 3 * userspace via nfetlink.
4 * 4 *
5 * (C) 2005 by Harald Welte <laforge@netfilter.org> 5 * (C) 2005 by Harald Welte <laforge@netfilter.org>
6 * (C) 2007 by Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * Based on the old ipv4-only ip_queue.c: 8 * Based on the old ipv4-only ip_queue.c:
8 * (C) 2000-2002 James Morris <jmorris@intercode.com.au> 9 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
@@ -27,6 +28,7 @@
27#include <linux/netfilter/nfnetlink_queue.h> 28#include <linux/netfilter/nfnetlink_queue.h>
28#include <linux/list.h> 29#include <linux/list.h>
29#include <net/sock.h> 30#include <net/sock.h>
31#include <net/netfilter/nf_queue.h>
30 32
31#include <asm/atomic.h> 33#include <asm/atomic.h>
32 34
@@ -36,24 +38,9 @@
36 38
37#define NFQNL_QMAX_DEFAULT 1024 39#define NFQNL_QMAX_DEFAULT 1024
38 40
39#if 0
40#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \
41 __FILE__, __LINE__, __FUNCTION__, \
42 ## args)
43#else
44#define QDEBUG(x, ...)
45#endif
46
47struct nfqnl_queue_entry {
48 struct list_head list;
49 struct nf_info *info;
50 struct sk_buff *skb;
51 unsigned int id;
52};
53
54struct nfqnl_instance { 41struct nfqnl_instance {
55 struct hlist_node hlist; /* global list of queues */ 42 struct hlist_node hlist; /* global list of queues */
56 atomic_t use; 43 struct rcu_head rcu;
57 44
58 int peer_pid; 45 int peer_pid;
59 unsigned int queue_maxlen; 46 unsigned int queue_maxlen;
@@ -62,7 +49,7 @@ struct nfqnl_instance {
62 unsigned int queue_dropped; 49 unsigned int queue_dropped;
63 unsigned int queue_user_dropped; 50 unsigned int queue_user_dropped;
64 51
65 atomic_t id_sequence; /* 'sequence' of pkt ids */ 52 unsigned int id_sequence; /* 'sequence' of pkt ids */
66 53
67 u_int16_t queue_num; /* number of this queue */ 54 u_int16_t queue_num; /* number of this queue */
68 u_int8_t copy_mode; 55 u_int8_t copy_mode;
@@ -72,12 +59,12 @@ struct nfqnl_instance {
72 struct list_head queue_list; /* packets in queue */ 59 struct list_head queue_list; /* packets in queue */
73}; 60};
74 61
75typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); 62typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
76 63
77static DEFINE_RWLOCK(instances_lock); 64static DEFINE_SPINLOCK(instances_lock);
78 65
79#define INSTANCE_BUCKETS 16 66#define INSTANCE_BUCKETS 16
80static struct hlist_head instance_table[INSTANCE_BUCKETS]; 67static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
81 68
82static inline u_int8_t instance_hashfn(u_int16_t queue_num) 69static inline u_int8_t instance_hashfn(u_int16_t queue_num)
83{ 70{
@@ -85,14 +72,14 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
85} 72}
86 73
87static struct nfqnl_instance * 74static struct nfqnl_instance *
88__instance_lookup(u_int16_t queue_num) 75instance_lookup(u_int16_t queue_num)
89{ 76{
90 struct hlist_head *head; 77 struct hlist_head *head;
91 struct hlist_node *pos; 78 struct hlist_node *pos;
92 struct nfqnl_instance *inst; 79 struct nfqnl_instance *inst;
93 80
94 head = &instance_table[instance_hashfn(queue_num)]; 81 head = &instance_table[instance_hashfn(queue_num)];
95 hlist_for_each_entry(inst, pos, head, hlist) { 82 hlist_for_each_entry_rcu(inst, pos, head, hlist) {
96 if (inst->queue_num == queue_num) 83 if (inst->queue_num == queue_num)
97 return inst; 84 return inst;
98 } 85 }
@@ -100,243 +87,131 @@ __instance_lookup(u_int16_t queue_num)
100} 87}
101 88
102static struct nfqnl_instance * 89static struct nfqnl_instance *
103instance_lookup_get(u_int16_t queue_num)
104{
105 struct nfqnl_instance *inst;
106
107 read_lock_bh(&instances_lock);
108 inst = __instance_lookup(queue_num);
109 if (inst)
110 atomic_inc(&inst->use);
111 read_unlock_bh(&instances_lock);
112
113 return inst;
114}
115
116static void
117instance_put(struct nfqnl_instance *inst)
118{
119 if (inst && atomic_dec_and_test(&inst->use)) {
120 QDEBUG("kfree(inst=%p)\n", inst);
121 kfree(inst);
122 }
123}
124
125static struct nfqnl_instance *
126instance_create(u_int16_t queue_num, int pid) 90instance_create(u_int16_t queue_num, int pid)
127{ 91{
128 struct nfqnl_instance *inst; 92 struct nfqnl_instance *inst;
93 unsigned int h;
94 int err;
129 95
130 QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); 96 spin_lock(&instances_lock);
131 97 if (instance_lookup(queue_num)) {
132 write_lock_bh(&instances_lock); 98 err = -EEXIST;
133 if (__instance_lookup(queue_num)) {
134 inst = NULL;
135 QDEBUG("aborting, instance already exists\n");
136 goto out_unlock; 99 goto out_unlock;
137 } 100 }
138 101
139 inst = kzalloc(sizeof(*inst), GFP_ATOMIC); 102 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
140 if (!inst) 103 if (!inst) {
104 err = -ENOMEM;
141 goto out_unlock; 105 goto out_unlock;
106 }
142 107
143 inst->queue_num = queue_num; 108 inst->queue_num = queue_num;
144 inst->peer_pid = pid; 109 inst->peer_pid = pid;
145 inst->queue_maxlen = NFQNL_QMAX_DEFAULT; 110 inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
146 inst->copy_range = 0xfffff; 111 inst->copy_range = 0xfffff;
147 inst->copy_mode = NFQNL_COPY_NONE; 112 inst->copy_mode = NFQNL_COPY_NONE;
148 atomic_set(&inst->id_sequence, 0);
149 /* needs to be two, since we _put() after creation */
150 atomic_set(&inst->use, 2);
151 spin_lock_init(&inst->lock); 113 spin_lock_init(&inst->lock);
152 INIT_LIST_HEAD(&inst->queue_list); 114 INIT_LIST_HEAD(&inst->queue_list);
115 INIT_RCU_HEAD(&inst->rcu);
153 116
154 if (!try_module_get(THIS_MODULE)) 117 if (!try_module_get(THIS_MODULE)) {
118 err = -EAGAIN;
155 goto out_free; 119 goto out_free;
120 }
156 121
157 hlist_add_head(&inst->hlist, 122 h = instance_hashfn(queue_num);
158 &instance_table[instance_hashfn(queue_num)]); 123 hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
159
160 write_unlock_bh(&instances_lock);
161 124
162 QDEBUG("successfully created new instance\n"); 125 spin_unlock(&instances_lock);
163 126
164 return inst; 127 return inst;
165 128
166out_free: 129out_free:
167 kfree(inst); 130 kfree(inst);
168out_unlock: 131out_unlock:
169 write_unlock_bh(&instances_lock); 132 spin_unlock(&instances_lock);
170 return NULL; 133 return ERR_PTR(err);
171} 134}
172 135
173static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); 136static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
137 unsigned long data);
174 138
175static void 139static void
176_instance_destroy2(struct nfqnl_instance *inst, int lock) 140instance_destroy_rcu(struct rcu_head *head)
177{ 141{
178 /* first pull it out of the global list */ 142 struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
179 if (lock) 143 rcu);
180 write_lock_bh(&instances_lock);
181
182 QDEBUG("removing instance %p (queuenum=%u) from hash\n",
183 inst, inst->queue_num);
184 hlist_del(&inst->hlist);
185
186 if (lock)
187 write_unlock_bh(&instances_lock);
188
189 /* then flush all pending skbs from the queue */
190 nfqnl_flush(inst, NF_DROP);
191
192 /* and finally put the refcount */
193 instance_put(inst);
194 144
145 nfqnl_flush(inst, NULL, 0);
146 kfree(inst);
195 module_put(THIS_MODULE); 147 module_put(THIS_MODULE);
196} 148}
197 149
198static inline void 150static void
199__instance_destroy(struct nfqnl_instance *inst) 151__instance_destroy(struct nfqnl_instance *inst)
200{ 152{
201 _instance_destroy2(inst, 0); 153 hlist_del_rcu(&inst->hlist);
154 call_rcu(&inst->rcu, instance_destroy_rcu);
202} 155}
203 156
204static inline void
205instance_destroy(struct nfqnl_instance *inst)
206{
207 _instance_destroy2(inst, 1);
208}
209
210
211
212static void 157static void
213issue_verdict(struct nfqnl_queue_entry *entry, int verdict) 158instance_destroy(struct nfqnl_instance *inst)
214{ 159{
215 QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); 160 spin_lock(&instances_lock);
216 161 __instance_destroy(inst);
217 /* TCP input path (and probably other bits) assume to be called 162 spin_unlock(&instances_lock);
218 * from softirq context, not from syscall, like issue_verdict is
219 * called. TCP input path deadlocks with locks taken from timer
220 * softirq, e.g. We therefore emulate this by local_bh_disable() */
221
222 local_bh_disable();
223 nf_reinject(entry->skb, entry->info, verdict);
224 local_bh_enable();
225
226 kfree(entry);
227} 163}
228 164
229static inline void 165static inline void
230__enqueue_entry(struct nfqnl_instance *queue, 166__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
231 struct nfqnl_queue_entry *entry)
232{ 167{
233 list_add(&entry->list, &queue->queue_list); 168 list_add_tail(&entry->list, &queue->queue_list);
234 queue->queue_total++; 169 queue->queue_total++;
235} 170}
236 171
237/* 172static struct nf_queue_entry *
238 * Find and return a queued entry matched by cmpfn, or return the last 173find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
239 * entry if cmpfn is NULL.
240 */
241static inline struct nfqnl_queue_entry *
242__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
243 unsigned long data)
244{ 174{
245 struct list_head *p; 175 struct nf_queue_entry *entry = NULL, *i;
246 176
247 list_for_each_prev(p, &queue->queue_list) { 177 spin_lock_bh(&queue->lock);
248 struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p;
249 178
250 if (!cmpfn || cmpfn(entry, data)) 179 list_for_each_entry(i, &queue->queue_list, list) {
251 return entry; 180 if (i->id == id) {
181 entry = i;
182 break;
183 }
252 } 184 }
253 return NULL;
254}
255
256static inline void
257__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry)
258{
259 list_del(&entry->list);
260 q->queue_total--;
261}
262
263static inline struct nfqnl_queue_entry *
264__find_dequeue_entry(struct nfqnl_instance *queue,
265 nfqnl_cmpfn cmpfn, unsigned long data)
266{
267 struct nfqnl_queue_entry *entry;
268
269 entry = __find_entry(queue, cmpfn, data);
270 if (entry == NULL)
271 return NULL;
272
273 __dequeue_entry(queue, entry);
274 return entry;
275}
276
277
278static inline void
279__nfqnl_flush(struct nfqnl_instance *queue, int verdict)
280{
281 struct nfqnl_queue_entry *entry;
282
283 while ((entry = __find_dequeue_entry(queue, NULL, 0)))
284 issue_verdict(entry, verdict);
285}
286
287static inline int
288__nfqnl_set_mode(struct nfqnl_instance *queue,
289 unsigned char mode, unsigned int range)
290{
291 int status = 0;
292
293 switch (mode) {
294 case NFQNL_COPY_NONE:
295 case NFQNL_COPY_META:
296 queue->copy_mode = mode;
297 queue->copy_range = 0;
298 break;
299
300 case NFQNL_COPY_PACKET:
301 queue->copy_mode = mode;
302 /* we're using struct nlattr which has 16bit nla_len */
303 if (range > 0xffff)
304 queue->copy_range = 0xffff;
305 else
306 queue->copy_range = range;
307 break;
308
309 default:
310 status = -EINVAL;
311 185
186 if (entry) {
187 list_del(&entry->list);
188 queue->queue_total--;
312 } 189 }
313 return status;
314}
315 190
316static struct nfqnl_queue_entry *
317find_dequeue_entry(struct nfqnl_instance *queue,
318 nfqnl_cmpfn cmpfn, unsigned long data)
319{
320 struct nfqnl_queue_entry *entry;
321
322 spin_lock_bh(&queue->lock);
323 entry = __find_dequeue_entry(queue, cmpfn, data);
324 spin_unlock_bh(&queue->lock); 191 spin_unlock_bh(&queue->lock);
325 192
326 return entry; 193 return entry;
327} 194}
328 195
329static void 196static void
330nfqnl_flush(struct nfqnl_instance *queue, int verdict) 197nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
331{ 198{
199 struct nf_queue_entry *entry, *next;
200
332 spin_lock_bh(&queue->lock); 201 spin_lock_bh(&queue->lock);
333 __nfqnl_flush(queue, verdict); 202 list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
203 if (!cmpfn || cmpfn(entry, data)) {
204 list_del(&entry->list);
205 queue->queue_total--;
206 nf_reinject(entry, NF_DROP);
207 }
208 }
334 spin_unlock_bh(&queue->lock); 209 spin_unlock_bh(&queue->lock);
335} 210}
336 211
337static struct sk_buff * 212static struct sk_buff *
338nfqnl_build_packet_message(struct nfqnl_instance *queue, 213nfqnl_build_packet_message(struct nfqnl_instance *queue,
339 struct nfqnl_queue_entry *entry, int *errp) 214 struct nf_queue_entry *entry)
340{ 215{
341 sk_buff_data_t old_tail; 216 sk_buff_data_t old_tail;
342 size_t size; 217 size_t size;
@@ -345,13 +220,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
345 struct nfqnl_msg_packet_hdr pmsg; 220 struct nfqnl_msg_packet_hdr pmsg;
346 struct nlmsghdr *nlh; 221 struct nlmsghdr *nlh;
347 struct nfgenmsg *nfmsg; 222 struct nfgenmsg *nfmsg;
348 struct nf_info *entinf = entry->info;
349 struct sk_buff *entskb = entry->skb; 223 struct sk_buff *entskb = entry->skb;
350 struct net_device *indev; 224 struct net_device *indev;
351 struct net_device *outdev; 225 struct net_device *outdev;
352 __be32 tmp_uint;
353
354 QDEBUG("entered\n");
355 226
356 size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) 227 size = NLMSG_ALIGN(sizeof(struct nfgenmsg))
357 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) 228 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -365,11 +236,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
365 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) 236 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
366 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); 237 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
367 238
368 outdev = entinf->outdev; 239 outdev = entry->outdev;
369 240
370 spin_lock_bh(&queue->lock); 241 spin_lock_bh(&queue->lock);
371 242
372 switch (queue->copy_mode) { 243 switch ((enum nfqnl_config_mode)queue->copy_mode) {
373 case NFQNL_COPY_META: 244 case NFQNL_COPY_META:
374 case NFQNL_COPY_NONE: 245 case NFQNL_COPY_NONE:
375 data_len = 0; 246 data_len = 0;
@@ -378,7 +249,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
378 case NFQNL_COPY_PACKET: 249 case NFQNL_COPY_PACKET:
379 if ((entskb->ip_summed == CHECKSUM_PARTIAL || 250 if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
380 entskb->ip_summed == CHECKSUM_COMPLETE) && 251 entskb->ip_summed == CHECKSUM_COMPLETE) &&
381 (*errp = skb_checksum_help(entskb))) { 252 skb_checksum_help(entskb)) {
382 spin_unlock_bh(&queue->lock); 253 spin_unlock_bh(&queue->lock);
383 return NULL; 254 return NULL;
384 } 255 }
@@ -390,13 +261,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
390 261
391 size += nla_total_size(data_len); 262 size += nla_total_size(data_len);
392 break; 263 break;
393
394 default:
395 *errp = -EINVAL;
396 spin_unlock_bh(&queue->lock);
397 return NULL;
398 } 264 }
399 265
266 entry->id = queue->id_sequence++;
267
400 spin_unlock_bh(&queue->lock); 268 spin_unlock_bh(&queue->lock);
401 269
402 skb = alloc_skb(size, GFP_ATOMIC); 270 skb = alloc_skb(size, GFP_ATOMIC);
@@ -408,81 +276,69 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
408 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, 276 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
409 sizeof(struct nfgenmsg)); 277 sizeof(struct nfgenmsg));
410 nfmsg = NLMSG_DATA(nlh); 278 nfmsg = NLMSG_DATA(nlh);
411 nfmsg->nfgen_family = entinf->pf; 279 nfmsg->nfgen_family = entry->pf;
412 nfmsg->version = NFNETLINK_V0; 280 nfmsg->version = NFNETLINK_V0;
413 nfmsg->res_id = htons(queue->queue_num); 281 nfmsg->res_id = htons(queue->queue_num);
414 282
415 pmsg.packet_id = htonl(entry->id); 283 pmsg.packet_id = htonl(entry->id);
416 pmsg.hw_protocol = entskb->protocol; 284 pmsg.hw_protocol = entskb->protocol;
417 pmsg.hook = entinf->hook; 285 pmsg.hook = entry->hook;
418 286
419 NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); 287 NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
420 288
421 indev = entinf->indev; 289 indev = entry->indev;
422 if (indev) { 290 if (indev) {
423 tmp_uint = htonl(indev->ifindex);
424#ifndef CONFIG_BRIDGE_NETFILTER 291#ifndef CONFIG_BRIDGE_NETFILTER
425 NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); 292 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
426#else 293#else
427 if (entinf->pf == PF_BRIDGE) { 294 if (entry->pf == PF_BRIDGE) {
428 /* Case 1: indev is physical input device, we need to 295 /* Case 1: indev is physical input device, we need to
429 * look for bridge group (when called from 296 * look for bridge group (when called from
430 * netfilter_bridge) */ 297 * netfilter_bridge) */
431 NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), 298 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
432 &tmp_uint); 299 htonl(indev->ifindex));
433 /* this is the bridge group "brX" */ 300 /* this is the bridge group "brX" */
434 tmp_uint = htonl(indev->br_port->br->dev->ifindex); 301 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
435 NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), 302 htonl(indev->br_port->br->dev->ifindex));
436 &tmp_uint);
437 } else { 303 } else {
438 /* Case 2: indev is bridge group, we need to look for 304 /* Case 2: indev is bridge group, we need to look for
439 * physical device (when called from ipv4) */ 305 * physical device (when called from ipv4) */
440 NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), 306 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
441 &tmp_uint); 307 htonl(indev->ifindex));
442 if (entskb->nf_bridge 308 if (entskb->nf_bridge && entskb->nf_bridge->physindev)
443 && entskb->nf_bridge->physindev) { 309 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
444 tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex); 310 htonl(entskb->nf_bridge->physindev->ifindex));
445 NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
446 sizeof(tmp_uint), &tmp_uint);
447 }
448 } 311 }
449#endif 312#endif
450 } 313 }
451 314
452 if (outdev) { 315 if (outdev) {
453 tmp_uint = htonl(outdev->ifindex);
454#ifndef CONFIG_BRIDGE_NETFILTER 316#ifndef CONFIG_BRIDGE_NETFILTER
455 NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); 317 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
456#else 318#else
457 if (entinf->pf == PF_BRIDGE) { 319 if (entry->pf == PF_BRIDGE) {
458 /* Case 1: outdev is physical output device, we need to 320 /* Case 1: outdev is physical output device, we need to
459 * look for bridge group (when called from 321 * look for bridge group (when called from
460 * netfilter_bridge) */ 322 * netfilter_bridge) */
461 NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), 323 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
462 &tmp_uint); 324 htonl(outdev->ifindex));
463 /* this is the bridge group "brX" */ 325 /* this is the bridge group "brX" */
464 tmp_uint = htonl(outdev->br_port->br->dev->ifindex); 326 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
465 NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), 327 htonl(outdev->br_port->br->dev->ifindex));
466 &tmp_uint);
467 } else { 328 } else {
468 /* Case 2: outdev is bridge group, we need to look for 329 /* Case 2: outdev is bridge group, we need to look for
469 * physical output device (when called from ipv4) */ 330 * physical output device (when called from ipv4) */
470 NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), 331 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
471 &tmp_uint); 332 htonl(outdev->ifindex));
472 if (entskb->nf_bridge 333 if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
473 && entskb->nf_bridge->physoutdev) { 334 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
474 tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex); 335 htonl(entskb->nf_bridge->physoutdev->ifindex));
475 NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
476 sizeof(tmp_uint), &tmp_uint);
477 }
478 } 336 }
479#endif 337#endif
480 } 338 }
481 339
482 if (entskb->mark) { 340 if (entskb->mark)
483 tmp_uint = htonl(entskb->mark); 341 NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
484 NLA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
485 }
486 342
487 if (indev && entskb->dev) { 343 if (indev && entskb->dev) {
488 struct nfqnl_msg_packet_hw phw; 344 struct nfqnl_msg_packet_hw phw;
@@ -504,7 +360,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
504 360
505 if (data_len) { 361 if (data_len) {
506 struct nlattr *nla; 362 struct nlattr *nla;
507 int size = nla_attr_size(data_len); 363 int sz = nla_attr_size(data_len);
508 364
509 if (skb_tailroom(skb) < nla_total_size(data_len)) { 365 if (skb_tailroom(skb) < nla_total_size(data_len)) {
510 printk(KERN_WARNING "nf_queue: no tailroom!\n"); 366 printk(KERN_WARNING "nf_queue: no tailroom!\n");
@@ -513,7 +369,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
513 369
514 nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); 370 nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
515 nla->nla_type = NFQA_PAYLOAD; 371 nla->nla_type = NFQA_PAYLOAD;
516 nla->nla_len = size; 372 nla->nla_len = sz;
517 373
518 if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) 374 if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
519 BUG(); 375 BUG();
@@ -526,51 +382,29 @@ nlmsg_failure:
526nla_put_failure: 382nla_put_failure:
527 if (skb) 383 if (skb)
528 kfree_skb(skb); 384 kfree_skb(skb);
529 *errp = -EINVAL;
530 if (net_ratelimit()) 385 if (net_ratelimit())
531 printk(KERN_ERR "nf_queue: error creating packet message\n"); 386 printk(KERN_ERR "nf_queue: error creating packet message\n");
532 return NULL; 387 return NULL;
533} 388}
534 389
535static int 390static int
536nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 391nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
537 unsigned int queuenum, void *data)
538{ 392{
539 int status = -EINVAL;
540 struct sk_buff *nskb; 393 struct sk_buff *nskb;
541 struct nfqnl_instance *queue; 394 struct nfqnl_instance *queue;
542 struct nfqnl_queue_entry *entry; 395 int err;
543
544 QDEBUG("entered\n");
545
546 queue = instance_lookup_get(queuenum);
547 if (!queue) {
548 QDEBUG("no queue instance matching\n");
549 return -EINVAL;
550 }
551
552 if (queue->copy_mode == NFQNL_COPY_NONE) {
553 QDEBUG("mode COPY_NONE, aborting\n");
554 status = -EAGAIN;
555 goto err_out_put;
556 }
557 396
558 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 397 /* rcu_read_lock()ed by nf_hook_slow() */
559 if (entry == NULL) { 398 queue = instance_lookup(queuenum);
560 if (net_ratelimit()) 399 if (!queue)
561 printk(KERN_ERR 400 goto err_out;
562 "nf_queue: OOM in nfqnl_enqueue_packet()\n");
563 status = -ENOMEM;
564 goto err_out_put;
565 }
566 401
567 entry->info = info; 402 if (queue->copy_mode == NFQNL_COPY_NONE)
568 entry->skb = skb; 403 goto err_out;
569 entry->id = atomic_inc_return(&queue->id_sequence);
570 404
571 nskb = nfqnl_build_packet_message(queue, entry, &status); 405 nskb = nfqnl_build_packet_message(queue, entry);
572 if (nskb == NULL) 406 if (nskb == NULL)
573 goto err_out_free; 407 goto err_out;
574 408
575 spin_lock_bh(&queue->lock); 409 spin_lock_bh(&queue->lock);
576 410
@@ -579,7 +413,6 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
579 413
580 if (queue->queue_total >= queue->queue_maxlen) { 414 if (queue->queue_total >= queue->queue_maxlen) {
581 queue->queue_dropped++; 415 queue->queue_dropped++;
582 status = -ENOSPC;
583 if (net_ratelimit()) 416 if (net_ratelimit())
584 printk(KERN_WARNING "nf_queue: full at %d entries, " 417 printk(KERN_WARNING "nf_queue: full at %d entries, "
585 "dropping packets(s). Dropped: %d\n", 418 "dropping packets(s). Dropped: %d\n",
@@ -588,8 +421,8 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
588 } 421 }
589 422
590 /* nfnetlink_unicast will either free the nskb or add it to a socket */ 423 /* nfnetlink_unicast will either free the nskb or add it to a socket */
591 status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); 424 err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
592 if (status < 0) { 425 if (err < 0) {
593 queue->queue_user_dropped++; 426 queue->queue_user_dropped++;
594 goto err_out_unlock; 427 goto err_out_unlock;
595 } 428 }
@@ -597,24 +430,18 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
597 __enqueue_entry(queue, entry); 430 __enqueue_entry(queue, entry);
598 431
599 spin_unlock_bh(&queue->lock); 432 spin_unlock_bh(&queue->lock);
600 instance_put(queue); 433 return 0;
601 return status;
602 434
603err_out_free_nskb: 435err_out_free_nskb:
604 kfree_skb(nskb); 436 kfree_skb(nskb);
605
606err_out_unlock: 437err_out_unlock:
607 spin_unlock_bh(&queue->lock); 438 spin_unlock_bh(&queue->lock);
608 439err_out:
609err_out_free: 440 return -1;
610 kfree(entry);
611err_out_put:
612 instance_put(queue);
613 return status;
614} 441}
615 442
616static int 443static int
617nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) 444nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
618{ 445{
619 int diff; 446 int diff;
620 int err; 447 int err;
@@ -645,35 +472,46 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
645 return 0; 472 return 0;
646} 473}
647 474
648static inline int
649id_cmp(struct nfqnl_queue_entry *e, unsigned long id)
650{
651 return (id == e->id);
652}
653
654static int 475static int
655nfqnl_set_mode(struct nfqnl_instance *queue, 476nfqnl_set_mode(struct nfqnl_instance *queue,
656 unsigned char mode, unsigned int range) 477 unsigned char mode, unsigned int range)
657{ 478{
658 int status; 479 int status = 0;
659 480
660 spin_lock_bh(&queue->lock); 481 spin_lock_bh(&queue->lock);
661 status = __nfqnl_set_mode(queue, mode, range); 482 switch (mode) {
483 case NFQNL_COPY_NONE:
484 case NFQNL_COPY_META:
485 queue->copy_mode = mode;
486 queue->copy_range = 0;
487 break;
488
489 case NFQNL_COPY_PACKET:
490 queue->copy_mode = mode;
491 /* we're using struct nlattr which has 16bit nla_len */
492 if (range > 0xffff)
493 queue->copy_range = 0xffff;
494 else
495 queue->copy_range = range;
496 break;
497
498 default:
499 status = -EINVAL;
500
501 }
662 spin_unlock_bh(&queue->lock); 502 spin_unlock_bh(&queue->lock);
663 503
664 return status; 504 return status;
665} 505}
666 506
667static int 507static int
668dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) 508dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
669{ 509{
670 struct nf_info *entinf = entry->info; 510 if (entry->indev)
671 511 if (entry->indev->ifindex == ifindex)
672 if (entinf->indev)
673 if (entinf->indev->ifindex == ifindex)
674 return 1; 512 return 1;
675 if (entinf->outdev) 513 if (entry->outdev)
676 if (entinf->outdev->ifindex == ifindex) 514 if (entry->outdev->ifindex == ifindex)
677 return 1; 515 return 1;
678#ifdef CONFIG_BRIDGE_NETFILTER 516#ifdef CONFIG_BRIDGE_NETFILTER
679 if (entry->skb->nf_bridge) { 517 if (entry->skb->nf_bridge) {
@@ -695,27 +533,18 @@ nfqnl_dev_drop(int ifindex)
695{ 533{
696 int i; 534 int i;
697 535
698 QDEBUG("entering for ifindex %u\n", ifindex); 536 rcu_read_lock();
699
700 /* this only looks like we have to hold the readlock for a way too long
701 * time, issue_verdict(), nf_reinject(), ... - but we always only
702 * issue NF_DROP, which is processed directly in nf_reinject() */
703 read_lock_bh(&instances_lock);
704 537
705 for (i = 0; i < INSTANCE_BUCKETS; i++) { 538 for (i = 0; i < INSTANCE_BUCKETS; i++) {
706 struct hlist_node *tmp; 539 struct hlist_node *tmp;
707 struct nfqnl_instance *inst; 540 struct nfqnl_instance *inst;
708 struct hlist_head *head = &instance_table[i]; 541 struct hlist_head *head = &instance_table[i];
709 542
710 hlist_for_each_entry(inst, tmp, head, hlist) { 543 hlist_for_each_entry_rcu(inst, tmp, head, hlist)
711 struct nfqnl_queue_entry *entry; 544 nfqnl_flush(inst, dev_cmp, ifindex);
712 while ((entry = find_dequeue_entry(inst, dev_cmp,
713 ifindex)) != NULL)
714 issue_verdict(entry, NF_DROP);
715 }
716 } 545 }
717 546
718 read_unlock_bh(&instances_lock); 547 rcu_read_unlock();
719} 548}
720 549
721#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 550#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -750,8 +579,8 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
750 int i; 579 int i;
751 580
752 /* destroy all instances for this pid */ 581 /* destroy all instances for this pid */
753 write_lock_bh(&instances_lock); 582 spin_lock(&instances_lock);
754 for (i = 0; i < INSTANCE_BUCKETS; i++) { 583 for (i = 0; i < INSTANCE_BUCKETS; i++) {
755 struct hlist_node *tmp, *t2; 584 struct hlist_node *tmp, *t2;
756 struct nfqnl_instance *inst; 585 struct nfqnl_instance *inst;
757 struct hlist_head *head = &instance_table[i]; 586 struct hlist_head *head = &instance_table[i];
@@ -762,7 +591,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
762 __instance_destroy(inst); 591 __instance_destroy(inst);
763 } 592 }
764 } 593 }
765 write_unlock_bh(&instances_lock); 594 spin_unlock(&instances_lock);
766 } 595 }
767 return NOTIFY_DONE; 596 return NOTIFY_DONE;
768} 597}
@@ -787,21 +616,24 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
787 struct nfqnl_msg_verdict_hdr *vhdr; 616 struct nfqnl_msg_verdict_hdr *vhdr;
788 struct nfqnl_instance *queue; 617 struct nfqnl_instance *queue;
789 unsigned int verdict; 618 unsigned int verdict;
790 struct nfqnl_queue_entry *entry; 619 struct nf_queue_entry *entry;
791 int err; 620 int err;
792 621
793 queue = instance_lookup_get(queue_num); 622 rcu_read_lock();
794 if (!queue) 623 queue = instance_lookup(queue_num);
795 return -ENODEV; 624 if (!queue) {
625 err = -ENODEV;
626 goto err_out_unlock;
627 }
796 628
797 if (queue->peer_pid != NETLINK_CB(skb).pid) { 629 if (queue->peer_pid != NETLINK_CB(skb).pid) {
798 err = -EPERM; 630 err = -EPERM;
799 goto err_out_put; 631 goto err_out_unlock;
800 } 632 }
801 633
802 if (!nfqa[NFQA_VERDICT_HDR]) { 634 if (!nfqa[NFQA_VERDICT_HDR]) {
803 err = -EINVAL; 635 err = -EINVAL;
804 goto err_out_put; 636 goto err_out_unlock;
805 } 637 }
806 638
807 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); 639 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
@@ -809,14 +641,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
809 641
810 if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { 642 if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
811 err = -EINVAL; 643 err = -EINVAL;
812 goto err_out_put; 644 goto err_out_unlock;
813 } 645 }
814 646
815 entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); 647 entry = find_dequeue_entry(queue, ntohl(vhdr->id));
816 if (entry == NULL) { 648 if (entry == NULL) {
817 err = -ENOENT; 649 err = -ENOENT;
818 goto err_out_put; 650 goto err_out_unlock;
819 } 651 }
652 rcu_read_unlock();
820 653
821 if (nfqa[NFQA_PAYLOAD]) { 654 if (nfqa[NFQA_PAYLOAD]) {
822 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), 655 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
@@ -825,15 +658,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
825 } 658 }
826 659
827 if (nfqa[NFQA_MARK]) 660 if (nfqa[NFQA_MARK])
828 entry->skb->mark = ntohl(*(__be32 *) 661 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
829 nla_data(nfqa[NFQA_MARK]));
830 662
831 issue_verdict(entry, verdict); 663 nf_reinject(entry, verdict);
832 instance_put(queue);
833 return 0; 664 return 0;
834 665
835err_out_put: 666err_out_unlock:
836 instance_put(queue); 667 rcu_read_unlock();
837 return err; 668 return err;
838} 669}
839 670
@@ -849,7 +680,7 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
849 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, 680 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
850}; 681};
851 682
852static struct nf_queue_handler nfqh = { 683static const struct nf_queue_handler nfqh = {
853 .name = "nf_queue", 684 .name = "nf_queue",
854 .outfn = &nfqnl_enqueue_packet, 685 .outfn = &nfqnl_enqueue_packet,
855}; 686};
@@ -861,70 +692,72 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
861 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 692 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
862 u_int16_t queue_num = ntohs(nfmsg->res_id); 693 u_int16_t queue_num = ntohs(nfmsg->res_id);
863 struct nfqnl_instance *queue; 694 struct nfqnl_instance *queue;
695 struct nfqnl_msg_config_cmd *cmd = NULL;
864 int ret = 0; 696 int ret = 0;
865 697
866 QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
867
868 queue = instance_lookup_get(queue_num);
869 if (nfqa[NFQA_CFG_CMD]) { 698 if (nfqa[NFQA_CFG_CMD]) {
870 struct nfqnl_msg_config_cmd *cmd;
871 cmd = nla_data(nfqa[NFQA_CFG_CMD]); 699 cmd = nla_data(nfqa[NFQA_CFG_CMD]);
872 QDEBUG("found CFG_CMD\n");
873 700
701 /* Commands without queue context - might sleep */
874 switch (cmd->command) { 702 switch (cmd->command) {
875 case NFQNL_CFG_CMD_BIND: 703 case NFQNL_CFG_CMD_PF_BIND:
876 if (queue) 704 ret = nf_register_queue_handler(ntohs(cmd->pf),
877 return -EBUSY; 705 &nfqh);
706 break;
707 case NFQNL_CFG_CMD_PF_UNBIND:
708 ret = nf_unregister_queue_handler(ntohs(cmd->pf),
709 &nfqh);
710 break;
711 default:
712 break;
713 }
714
715 if (ret < 0)
716 return ret;
717 }
718
719 rcu_read_lock();
720 queue = instance_lookup(queue_num);
721 if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
722 ret = -EPERM;
723 goto err_out_unlock;
724 }
878 725
726 if (cmd != NULL) {
727 switch (cmd->command) {
728 case NFQNL_CFG_CMD_BIND:
729 if (queue) {
730 ret = -EBUSY;
731 goto err_out_unlock;
732 }
879 queue = instance_create(queue_num, NETLINK_CB(skb).pid); 733 queue = instance_create(queue_num, NETLINK_CB(skb).pid);
880 if (!queue) 734 if (IS_ERR(queue)) {
881 return -EINVAL; 735 ret = PTR_ERR(queue);
736 goto err_out_unlock;
737 }
882 break; 738 break;
883 case NFQNL_CFG_CMD_UNBIND: 739 case NFQNL_CFG_CMD_UNBIND:
884 if (!queue) 740 if (!queue) {
885 return -ENODEV; 741 ret = -ENODEV;
886 742 goto err_out_unlock;
887 if (queue->peer_pid != NETLINK_CB(skb).pid) {
888 ret = -EPERM;
889 goto out_put;
890 } 743 }
891
892 instance_destroy(queue); 744 instance_destroy(queue);
893 break; 745 break;
894 case NFQNL_CFG_CMD_PF_BIND: 746 case NFQNL_CFG_CMD_PF_BIND:
895 QDEBUG("registering queue handler for pf=%u\n",
896 ntohs(cmd->pf));
897 ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
898 break;
899 case NFQNL_CFG_CMD_PF_UNBIND: 747 case NFQNL_CFG_CMD_PF_UNBIND:
900 QDEBUG("unregistering queue handler for pf=%u\n",
901 ntohs(cmd->pf));
902 ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh);
903 break; 748 break;
904 default: 749 default:
905 ret = -EINVAL; 750 ret = -ENOTSUPP;
906 break; 751 break;
907 } 752 }
908 } else {
909 if (!queue) {
910 QDEBUG("no config command, and no instance ENOENT\n");
911 ret = -ENOENT;
912 goto out_put;
913 }
914
915 if (queue->peer_pid != NETLINK_CB(skb).pid) {
916 QDEBUG("no config command, and wrong pid\n");
917 ret = -EPERM;
918 goto out_put;
919 }
920 } 753 }
921 754
922 if (nfqa[NFQA_CFG_PARAMS]) { 755 if (nfqa[NFQA_CFG_PARAMS]) {
923 struct nfqnl_msg_config_params *params; 756 struct nfqnl_msg_config_params *params;
924 757
925 if (!queue) { 758 if (!queue) {
926 ret = -ENOENT; 759 ret = -ENODEV;
927 goto out_put; 760 goto err_out_unlock;
928 } 761 }
929 params = nla_data(nfqa[NFQA_CFG_PARAMS]); 762 params = nla_data(nfqa[NFQA_CFG_PARAMS]);
930 nfqnl_set_mode(queue, params->copy_mode, 763 nfqnl_set_mode(queue, params->copy_mode,
@@ -933,14 +766,19 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
933 766
934 if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { 767 if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
935 __be32 *queue_maxlen; 768 __be32 *queue_maxlen;
769
770 if (!queue) {
771 ret = -ENODEV;
772 goto err_out_unlock;
773 }
936 queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); 774 queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
937 spin_lock_bh(&queue->lock); 775 spin_lock_bh(&queue->lock);
938 queue->queue_maxlen = ntohl(*queue_maxlen); 776 queue->queue_maxlen = ntohl(*queue_maxlen);
939 spin_unlock_bh(&queue->lock); 777 spin_unlock_bh(&queue->lock);
940 } 778 }
941 779
942out_put: 780err_out_unlock:
943 instance_put(queue); 781 rcu_read_unlock();
944 return ret; 782 return ret;
945} 783}
946 784
@@ -1007,8 +845,9 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
1007} 845}
1008 846
1009static void *seq_start(struct seq_file *seq, loff_t *pos) 847static void *seq_start(struct seq_file *seq, loff_t *pos)
848 __acquires(instances_lock)
1010{ 849{
1011 read_lock_bh(&instances_lock); 850 spin_lock(&instances_lock);
1012 return get_idx(seq, *pos); 851 return get_idx(seq, *pos);
1013} 852}
1014 853
@@ -1019,8 +858,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
1019} 858}
1020 859
1021static void seq_stop(struct seq_file *s, void *v) 860static void seq_stop(struct seq_file *s, void *v)
861 __releases(instances_lock)
1022{ 862{
1023 read_unlock_bh(&instances_lock); 863 spin_unlock(&instances_lock);
1024} 864}
1025 865
1026static int seq_show(struct seq_file *s, void *v) 866static int seq_show(struct seq_file *s, void *v)
@@ -1032,8 +872,7 @@ static int seq_show(struct seq_file *s, void *v)
1032 inst->peer_pid, inst->queue_total, 872 inst->peer_pid, inst->queue_total,
1033 inst->copy_mode, inst->copy_range, 873 inst->copy_mode, inst->copy_range,
1034 inst->queue_dropped, inst->queue_user_dropped, 874 inst->queue_dropped, inst->queue_user_dropped,
1035 atomic_read(&inst->id_sequence), 875 inst->id_sequence, 1);
1036 atomic_read(&inst->use));
1037} 876}
1038 877
1039static const struct seq_operations nfqnl_seq_ops = { 878static const struct seq_operations nfqnl_seq_ops = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d9a3bded0d00..a6792089fcf9 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -34,12 +34,20 @@ MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module");
34 34
35#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 35#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
36 36
37struct compat_delta {
38 struct compat_delta *next;
39 unsigned int offset;
40 short delta;
41};
42
37struct xt_af { 43struct xt_af {
38 struct mutex mutex; 44 struct mutex mutex;
39 struct list_head match; 45 struct list_head match;
40 struct list_head target; 46 struct list_head target;
41 struct list_head tables; 47#ifdef CONFIG_COMPAT
42 struct mutex compat_mutex; 48 struct mutex compat_mutex;
49 struct compat_delta *compat_offsets;
50#endif
43}; 51};
44 52
45static struct xt_af *xt; 53static struct xt_af *xt;
@@ -50,12 +58,6 @@ static struct xt_af *xt;
50#define duprintf(format, args...) 58#define duprintf(format, args...)
51#endif 59#endif
52 60
53enum {
54 TABLE,
55 TARGET,
56 MATCH,
57};
58
59static const char *xt_prefix[NPROTO] = { 61static const char *xt_prefix[NPROTO] = {
60 [AF_INET] = "ip", 62 [AF_INET] = "ip",
61 [AF_INET6] = "ip6", 63 [AF_INET6] = "ip6",
@@ -335,6 +337,54 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
335EXPORT_SYMBOL_GPL(xt_check_match); 337EXPORT_SYMBOL_GPL(xt_check_match);
336 338
337#ifdef CONFIG_COMPAT 339#ifdef CONFIG_COMPAT
340int xt_compat_add_offset(int af, unsigned int offset, short delta)
341{
342 struct compat_delta *tmp;
343
344 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
345 if (!tmp)
346 return -ENOMEM;
347
348 tmp->offset = offset;
349 tmp->delta = delta;
350
351 if (xt[af].compat_offsets) {
352 tmp->next = xt[af].compat_offsets->next;
353 xt[af].compat_offsets->next = tmp;
354 } else {
355 xt[af].compat_offsets = tmp;
356 tmp->next = NULL;
357 }
358 return 0;
359}
360EXPORT_SYMBOL_GPL(xt_compat_add_offset);
361
362void xt_compat_flush_offsets(int af)
363{
364 struct compat_delta *tmp, *next;
365
366 if (xt[af].compat_offsets) {
367 for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
368 next = tmp->next;
369 kfree(tmp);
370 }
371 xt[af].compat_offsets = NULL;
372 }
373}
374EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
375
376short xt_compat_calc_jump(int af, unsigned int offset)
377{
378 struct compat_delta *tmp;
379 short delta;
380
381 for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
382 if (tmp->offset < offset)
383 delta += tmp->delta;
384 return delta;
385}
386EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
387
338int xt_compat_match_offset(struct xt_match *match) 388int xt_compat_match_offset(struct xt_match *match)
339{ 389{
340 u_int16_t csize = match->compatsize ? : match->matchsize; 390 u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -342,8 +392,8 @@ int xt_compat_match_offset(struct xt_match *match)
342} 392}
343EXPORT_SYMBOL_GPL(xt_compat_match_offset); 393EXPORT_SYMBOL_GPL(xt_compat_match_offset);
344 394
345void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, 395int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
346 int *size) 396 unsigned int *size)
347{ 397{
348 struct xt_match *match = m->u.kernel.match; 398 struct xt_match *match = m->u.kernel.match;
349 struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; 399 struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
@@ -365,11 +415,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
365 415
366 *size += off; 416 *size += off;
367 *dstptr += msize; 417 *dstptr += msize;
418 return 0;
368} 419}
369EXPORT_SYMBOL_GPL(xt_compat_match_from_user); 420EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
370 421
371int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, 422int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
372 int *size) 423 unsigned int *size)
373{ 424{
374 struct xt_match *match = m->u.kernel.match; 425 struct xt_match *match = m->u.kernel.match;
375 struct compat_xt_entry_match __user *cm = *dstptr; 426 struct compat_xt_entry_match __user *cm = *dstptr;
@@ -377,7 +428,9 @@ int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
377 u_int16_t msize = m->u.user.match_size - off; 428 u_int16_t msize = m->u.user.match_size - off;
378 429
379 if (copy_to_user(cm, m, sizeof(*cm)) || 430 if (copy_to_user(cm, m, sizeof(*cm)) ||
380 put_user(msize, &cm->u.user.match_size)) 431 put_user(msize, &cm->u.user.match_size) ||
432 copy_to_user(cm->u.user.name, m->u.kernel.match->name,
433 strlen(m->u.kernel.match->name) + 1))
381 return -EFAULT; 434 return -EFAULT;
382 435
383 if (match->compat_to_user) { 436 if (match->compat_to_user) {
@@ -434,7 +487,7 @@ int xt_compat_target_offset(struct xt_target *target)
434EXPORT_SYMBOL_GPL(xt_compat_target_offset); 487EXPORT_SYMBOL_GPL(xt_compat_target_offset);
435 488
436void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, 489void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
437 int *size) 490 unsigned int *size)
438{ 491{
439 struct xt_target *target = t->u.kernel.target; 492 struct xt_target *target = t->u.kernel.target;
440 struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; 493 struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
@@ -460,7 +513,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
460EXPORT_SYMBOL_GPL(xt_compat_target_from_user); 513EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
461 514
462int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, 515int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
463 int *size) 516 unsigned int *size)
464{ 517{
465 struct xt_target *target = t->u.kernel.target; 518 struct xt_target *target = t->u.kernel.target;
466 struct compat_xt_entry_target __user *ct = *dstptr; 519 struct compat_xt_entry_target __user *ct = *dstptr;
@@ -468,7 +521,9 @@ int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
468 u_int16_t tsize = t->u.user.target_size - off; 521 u_int16_t tsize = t->u.user.target_size - off;
469 522
470 if (copy_to_user(ct, t, sizeof(*ct)) || 523 if (copy_to_user(ct, t, sizeof(*ct)) ||
471 put_user(tsize, &ct->u.user.target_size)) 524 put_user(tsize, &ct->u.user.target_size) ||
525 copy_to_user(ct->u.user.name, t->u.kernel.target->name,
526 strlen(t->u.kernel.target->name) + 1))
472 return -EFAULT; 527 return -EFAULT;
473 528
474 if (target->compat_to_user) { 529 if (target->compat_to_user) {
@@ -495,7 +550,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
495 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) 550 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
496 return NULL; 551 return NULL;
497 552
498 newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL); 553 newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
499 if (!newinfo) 554 if (!newinfo)
500 return NULL; 555 return NULL;
501 556
@@ -535,14 +590,14 @@ void xt_free_table_info(struct xt_table_info *info)
535EXPORT_SYMBOL(xt_free_table_info); 590EXPORT_SYMBOL(xt_free_table_info);
536 591
537/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ 592/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
538struct xt_table *xt_find_table_lock(int af, const char *name) 593struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name)
539{ 594{
540 struct xt_table *t; 595 struct xt_table *t;
541 596
542 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 597 if (mutex_lock_interruptible(&xt[af].mutex) != 0)
543 return ERR_PTR(-EINTR); 598 return ERR_PTR(-EINTR);
544 599
545 list_for_each_entry(t, &xt[af].tables, list) 600 list_for_each_entry(t, &net->xt.tables[af], list)
546 if (strcmp(t->name, name) == 0 && try_module_get(t->me)) 601 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
547 return t; 602 return t;
548 mutex_unlock(&xt[af].mutex); 603 mutex_unlock(&xt[af].mutex);
@@ -598,20 +653,27 @@ xt_replace_table(struct xt_table *table,
598} 653}
599EXPORT_SYMBOL_GPL(xt_replace_table); 654EXPORT_SYMBOL_GPL(xt_replace_table);
600 655
601int xt_register_table(struct xt_table *table, 656struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
602 struct xt_table_info *bootstrap, 657 struct xt_table_info *bootstrap,
603 struct xt_table_info *newinfo) 658 struct xt_table_info *newinfo)
604{ 659{
605 int ret; 660 int ret;
606 struct xt_table_info *private; 661 struct xt_table_info *private;
607 struct xt_table *t; 662 struct xt_table *t;
608 663
664 /* Don't add one object to multiple lists. */
665 table = kmemdup(table, sizeof(struct xt_table), GFP_KERNEL);
666 if (!table) {
667 ret = -ENOMEM;
668 goto out;
669 }
670
609 ret = mutex_lock_interruptible(&xt[table->af].mutex); 671 ret = mutex_lock_interruptible(&xt[table->af].mutex);
610 if (ret != 0) 672 if (ret != 0)
611 return ret; 673 goto out_free;
612 674
613 /* Don't autoload: we'd eat our tail... */ 675 /* Don't autoload: we'd eat our tail... */
614 list_for_each_entry(t, &xt[table->af].tables, list) { 676 list_for_each_entry(t, &net->xt.tables[table->af], list) {
615 if (strcmp(t->name, table->name) == 0) { 677 if (strcmp(t->name, table->name) == 0) {
616 ret = -EEXIST; 678 ret = -EEXIST;
617 goto unlock; 679 goto unlock;
@@ -630,12 +692,16 @@ int xt_register_table(struct xt_table *table,
630 /* save number of initial entries */ 692 /* save number of initial entries */
631 private->initial_entries = private->number; 693 private->initial_entries = private->number;
632 694
633 list_add(&table->list, &xt[table->af].tables); 695 list_add(&table->list, &net->xt.tables[table->af]);
696 mutex_unlock(&xt[table->af].mutex);
697 return table;
634 698
635 ret = 0;
636 unlock: 699 unlock:
637 mutex_unlock(&xt[table->af].mutex); 700 mutex_unlock(&xt[table->af].mutex);
638 return ret; 701out_free:
702 kfree(table);
703out:
704 return ERR_PTR(ret);
639} 705}
640EXPORT_SYMBOL_GPL(xt_register_table); 706EXPORT_SYMBOL_GPL(xt_register_table);
641 707
@@ -647,130 +713,204 @@ void *xt_unregister_table(struct xt_table *table)
647 private = table->private; 713 private = table->private;
648 list_del(&table->list); 714 list_del(&table->list);
649 mutex_unlock(&xt[table->af].mutex); 715 mutex_unlock(&xt[table->af].mutex);
716 kfree(table);
650 717
651 return private; 718 return private;
652} 719}
653EXPORT_SYMBOL_GPL(xt_unregister_table); 720EXPORT_SYMBOL_GPL(xt_unregister_table);
654 721
655#ifdef CONFIG_PROC_FS 722#ifdef CONFIG_PROC_FS
656static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos) 723struct xt_names_priv {
724 struct seq_net_private p;
725 int af;
726};
727static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
657{ 728{
658 struct list_head *head = list->next; 729 struct xt_names_priv *priv = seq->private;
730 struct net *net = priv->p.net;
731 int af = priv->af;
659 732
660 if (!head || list_empty(list)) 733 mutex_lock(&xt[af].mutex);
661 return NULL; 734 return seq_list_start(&net->xt.tables[af], *pos);
735}
662 736
663 while (pos && (head = head->next)) { 737static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
664 if (head == list) 738{
665 return NULL; 739 struct xt_names_priv *priv = seq->private;
666 pos--; 740 struct net *net = priv->p.net;
667 } 741 int af = priv->af;
668 return pos ? NULL : head;
669}
670
671static struct list_head *type2list(u_int16_t af, u_int16_t type)
672{
673 struct list_head *list;
674
675 switch (type) {
676 case TARGET:
677 list = &xt[af].target;
678 break;
679 case MATCH:
680 list = &xt[af].match;
681 break;
682 case TABLE:
683 list = &xt[af].tables;
684 break;
685 default:
686 list = NULL;
687 break;
688 }
689 742
690 return list; 743 return seq_list_next(v, &net->xt.tables[af], pos);
691} 744}
692 745
693static void *xt_tgt_seq_start(struct seq_file *seq, loff_t *pos) 746static void xt_table_seq_stop(struct seq_file *seq, void *v)
694{ 747{
695 struct proc_dir_entry *pde = (struct proc_dir_entry *) seq->private; 748 struct xt_names_priv *priv = seq->private;
696 u_int16_t af = (unsigned long)pde->data & 0xffff; 749 int af = priv->af;
697 u_int16_t type = (unsigned long)pde->data >> 16;
698 struct list_head *list;
699 750
700 if (af >= NPROTO) 751 mutex_unlock(&xt[af].mutex);
701 return NULL; 752}
702 753
703 list = type2list(af, type); 754static int xt_table_seq_show(struct seq_file *seq, void *v)
704 if (!list) 755{
705 return NULL; 756 struct xt_table *table = list_entry(v, struct xt_table, list);
706 757
707 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 758 if (strlen(table->name))
708 return NULL; 759 return seq_printf(seq, "%s\n", table->name);
760 else
761 return 0;
762}
709 763
710 return xt_get_idx(list, seq, *pos); 764static const struct seq_operations xt_table_seq_ops = {
765 .start = xt_table_seq_start,
766 .next = xt_table_seq_next,
767 .stop = xt_table_seq_stop,
768 .show = xt_table_seq_show,
769};
770
771static int xt_table_open(struct inode *inode, struct file *file)
772{
773 int ret;
774 struct xt_names_priv *priv;
775
776 ret = seq_open_net(inode, file, &xt_table_seq_ops,
777 sizeof(struct xt_names_priv));
778 if (!ret) {
779 priv = ((struct seq_file *)file->private_data)->private;
780 priv->af = (unsigned long)PDE(inode)->data;
781 }
782 return ret;
711} 783}
712 784
713static void *xt_tgt_seq_next(struct seq_file *seq, void *v, loff_t *pos) 785static const struct file_operations xt_table_ops = {
786 .owner = THIS_MODULE,
787 .open = xt_table_open,
788 .read = seq_read,
789 .llseek = seq_lseek,
790 .release = seq_release,
791};
792
793static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
714{ 794{
715 struct proc_dir_entry *pde = seq->private; 795 struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
716 u_int16_t af = (unsigned long)pde->data & 0xffff; 796 u_int16_t af = (unsigned long)pde->data;
717 u_int16_t type = (unsigned long)pde->data >> 16;
718 struct list_head *list;
719 797
720 if (af >= NPROTO) 798 mutex_lock(&xt[af].mutex);
721 return NULL; 799 return seq_list_start(&xt[af].match, *pos);
800}
722 801
723 list = type2list(af, type); 802static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *pos)
724 if (!list) 803{
725 return NULL; 804 struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
805 u_int16_t af = (unsigned long)pde->data;
726 806
727 (*pos)++; 807 return seq_list_next(v, &xt[af].match, pos);
728 return xt_get_idx(list, seq, *pos);
729} 808}
730 809
731static void xt_tgt_seq_stop(struct seq_file *seq, void *v) 810static void xt_match_seq_stop(struct seq_file *seq, void *v)
732{ 811{
733 struct proc_dir_entry *pde = seq->private; 812 struct proc_dir_entry *pde = seq->private;
734 u_int16_t af = (unsigned long)pde->data & 0xffff; 813 u_int16_t af = (unsigned long)pde->data;
735 814
736 mutex_unlock(&xt[af].mutex); 815 mutex_unlock(&xt[af].mutex);
737} 816}
738 817
739static int xt_name_seq_show(struct seq_file *seq, void *v) 818static int xt_match_seq_show(struct seq_file *seq, void *v)
740{ 819{
741 char *name = (char *)v + sizeof(struct list_head); 820 struct xt_match *match = list_entry(v, struct xt_match, list);
742 821
743 if (strlen(name)) 822 if (strlen(match->name))
744 return seq_printf(seq, "%s\n", name); 823 return seq_printf(seq, "%s\n", match->name);
745 else 824 else
746 return 0; 825 return 0;
747} 826}
748 827
749static const struct seq_operations xt_tgt_seq_ops = { 828static const struct seq_operations xt_match_seq_ops = {
750 .start = xt_tgt_seq_start, 829 .start = xt_match_seq_start,
751 .next = xt_tgt_seq_next, 830 .next = xt_match_seq_next,
752 .stop = xt_tgt_seq_stop, 831 .stop = xt_match_seq_stop,
753 .show = xt_name_seq_show, 832 .show = xt_match_seq_show,
754}; 833};
755 834
756static int xt_tgt_open(struct inode *inode, struct file *file) 835static int xt_match_open(struct inode *inode, struct file *file)
757{ 836{
758 int ret; 837 int ret;
759 838
760 ret = seq_open(file, &xt_tgt_seq_ops); 839 ret = seq_open(file, &xt_match_seq_ops);
761 if (!ret) { 840 if (!ret) {
762 struct seq_file *seq = file->private_data; 841 struct seq_file *seq = file->private_data;
763 struct proc_dir_entry *pde = PDE(inode);
764 842
765 seq->private = pde; 843 seq->private = PDE(inode);
766 } 844 }
845 return ret;
846}
847
848static const struct file_operations xt_match_ops = {
849 .owner = THIS_MODULE,
850 .open = xt_match_open,
851 .read = seq_read,
852 .llseek = seq_lseek,
853 .release = seq_release,
854};
855
856static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
857{
858 struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
859 u_int16_t af = (unsigned long)pde->data;
860
861 mutex_lock(&xt[af].mutex);
862 return seq_list_start(&xt[af].target, *pos);
863}
864
865static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *pos)
866{
867 struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
868 u_int16_t af = (unsigned long)pde->data;
869
870 return seq_list_next(v, &xt[af].target, pos);
871}
767 872
873static void xt_target_seq_stop(struct seq_file *seq, void *v)
874{
875 struct proc_dir_entry *pde = seq->private;
876 u_int16_t af = (unsigned long)pde->data;
877
878 mutex_unlock(&xt[af].mutex);
879}
880
881static int xt_target_seq_show(struct seq_file *seq, void *v)
882{
883 struct xt_target *target = list_entry(v, struct xt_target, list);
884
885 if (strlen(target->name))
886 return seq_printf(seq, "%s\n", target->name);
887 else
888 return 0;
889}
890
891static const struct seq_operations xt_target_seq_ops = {
892 .start = xt_target_seq_start,
893 .next = xt_target_seq_next,
894 .stop = xt_target_seq_stop,
895 .show = xt_target_seq_show,
896};
897
898static int xt_target_open(struct inode *inode, struct file *file)
899{
900 int ret;
901
902 ret = seq_open(file, &xt_target_seq_ops);
903 if (!ret) {
904 struct seq_file *seq = file->private_data;
905
906 seq->private = PDE(inode);
907 }
768 return ret; 908 return ret;
769} 909}
770 910
771static const struct file_operations xt_file_ops = { 911static const struct file_operations xt_target_ops = {
772 .owner = THIS_MODULE, 912 .owner = THIS_MODULE,
773 .open = xt_tgt_open, 913 .open = xt_target_open,
774 .read = seq_read, 914 .read = seq_read,
775 .llseek = seq_lseek, 915 .llseek = seq_lseek,
776 .release = seq_release, 916 .release = seq_release,
@@ -782,7 +922,7 @@ static const struct file_operations xt_file_ops = {
782 922
783#endif /* CONFIG_PROC_FS */ 923#endif /* CONFIG_PROC_FS */
784 924
785int xt_proto_init(int af) 925int xt_proto_init(struct net *net, int af)
786{ 926{
787#ifdef CONFIG_PROC_FS 927#ifdef CONFIG_PROC_FS
788 char buf[XT_FUNCTION_MAXNAMELEN]; 928 char buf[XT_FUNCTION_MAXNAMELEN];
@@ -796,25 +936,25 @@ int xt_proto_init(int af)
796#ifdef CONFIG_PROC_FS 936#ifdef CONFIG_PROC_FS
797 strlcpy(buf, xt_prefix[af], sizeof(buf)); 937 strlcpy(buf, xt_prefix[af], sizeof(buf));
798 strlcat(buf, FORMAT_TABLES, sizeof(buf)); 938 strlcat(buf, FORMAT_TABLES, sizeof(buf));
799 proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); 939 proc = proc_net_fops_create(net, buf, 0440, &xt_table_ops);
800 if (!proc) 940 if (!proc)
801 goto out; 941 goto out;
802 proc->data = (void *) ((unsigned long) af | (TABLE << 16)); 942 proc->data = (void *)(unsigned long)af;
803 943
804 944
805 strlcpy(buf, xt_prefix[af], sizeof(buf)); 945 strlcpy(buf, xt_prefix[af], sizeof(buf));
806 strlcat(buf, FORMAT_MATCHES, sizeof(buf)); 946 strlcat(buf, FORMAT_MATCHES, sizeof(buf));
807 proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); 947 proc = proc_net_fops_create(net, buf, 0440, &xt_match_ops);
808 if (!proc) 948 if (!proc)
809 goto out_remove_tables; 949 goto out_remove_tables;
810 proc->data = (void *) ((unsigned long) af | (MATCH << 16)); 950 proc->data = (void *)(unsigned long)af;
811 951
812 strlcpy(buf, xt_prefix[af], sizeof(buf)); 952 strlcpy(buf, xt_prefix[af], sizeof(buf));
813 strlcat(buf, FORMAT_TARGETS, sizeof(buf)); 953 strlcat(buf, FORMAT_TARGETS, sizeof(buf));
814 proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); 954 proc = proc_net_fops_create(net, buf, 0440, &xt_target_ops);
815 if (!proc) 955 if (!proc)
816 goto out_remove_matches; 956 goto out_remove_matches;
817 proc->data = (void *) ((unsigned long) af | (TARGET << 16)); 957 proc->data = (void *)(unsigned long)af;
818#endif 958#endif
819 959
820 return 0; 960 return 0;
@@ -823,42 +963,54 @@ int xt_proto_init(int af)
823out_remove_matches: 963out_remove_matches:
824 strlcpy(buf, xt_prefix[af], sizeof(buf)); 964 strlcpy(buf, xt_prefix[af], sizeof(buf));
825 strlcat(buf, FORMAT_MATCHES, sizeof(buf)); 965 strlcat(buf, FORMAT_MATCHES, sizeof(buf));
826 proc_net_remove(&init_net, buf); 966 proc_net_remove(net, buf);
827 967
828out_remove_tables: 968out_remove_tables:
829 strlcpy(buf, xt_prefix[af], sizeof(buf)); 969 strlcpy(buf, xt_prefix[af], sizeof(buf));
830 strlcat(buf, FORMAT_TABLES, sizeof(buf)); 970 strlcat(buf, FORMAT_TABLES, sizeof(buf));
831 proc_net_remove(&init_net, buf); 971 proc_net_remove(net, buf);
832out: 972out:
833 return -1; 973 return -1;
834#endif 974#endif
835} 975}
836EXPORT_SYMBOL_GPL(xt_proto_init); 976EXPORT_SYMBOL_GPL(xt_proto_init);
837 977
838void xt_proto_fini(int af) 978void xt_proto_fini(struct net *net, int af)
839{ 979{
840#ifdef CONFIG_PROC_FS 980#ifdef CONFIG_PROC_FS
841 char buf[XT_FUNCTION_MAXNAMELEN]; 981 char buf[XT_FUNCTION_MAXNAMELEN];
842 982
843 strlcpy(buf, xt_prefix[af], sizeof(buf)); 983 strlcpy(buf, xt_prefix[af], sizeof(buf));
844 strlcat(buf, FORMAT_TABLES, sizeof(buf)); 984 strlcat(buf, FORMAT_TABLES, sizeof(buf));
845 proc_net_remove(&init_net, buf); 985 proc_net_remove(net, buf);
846 986
847 strlcpy(buf, xt_prefix[af], sizeof(buf)); 987 strlcpy(buf, xt_prefix[af], sizeof(buf));
848 strlcat(buf, FORMAT_TARGETS, sizeof(buf)); 988 strlcat(buf, FORMAT_TARGETS, sizeof(buf));
849 proc_net_remove(&init_net, buf); 989 proc_net_remove(net, buf);
850 990
851 strlcpy(buf, xt_prefix[af], sizeof(buf)); 991 strlcpy(buf, xt_prefix[af], sizeof(buf));
852 strlcat(buf, FORMAT_MATCHES, sizeof(buf)); 992 strlcat(buf, FORMAT_MATCHES, sizeof(buf));
853 proc_net_remove(&init_net, buf); 993 proc_net_remove(net, buf);
854#endif /*CONFIG_PROC_FS*/ 994#endif /*CONFIG_PROC_FS*/
855} 995}
856EXPORT_SYMBOL_GPL(xt_proto_fini); 996EXPORT_SYMBOL_GPL(xt_proto_fini);
857 997
998static int __net_init xt_net_init(struct net *net)
999{
1000 int i;
1001
1002 for (i = 0; i < NPROTO; i++)
1003 INIT_LIST_HEAD(&net->xt.tables[i]);
1004 return 0;
1005}
1006
1007static struct pernet_operations xt_net_ops = {
1008 .init = xt_net_init,
1009};
858 1010
859static int __init xt_init(void) 1011static int __init xt_init(void)
860{ 1012{
861 int i; 1013 int i, rv;
862 1014
863 xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL); 1015 xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
864 if (!xt) 1016 if (!xt)
@@ -868,16 +1020,20 @@ static int __init xt_init(void)
868 mutex_init(&xt[i].mutex); 1020 mutex_init(&xt[i].mutex);
869#ifdef CONFIG_COMPAT 1021#ifdef CONFIG_COMPAT
870 mutex_init(&xt[i].compat_mutex); 1022 mutex_init(&xt[i].compat_mutex);
1023 xt[i].compat_offsets = NULL;
871#endif 1024#endif
872 INIT_LIST_HEAD(&xt[i].target); 1025 INIT_LIST_HEAD(&xt[i].target);
873 INIT_LIST_HEAD(&xt[i].match); 1026 INIT_LIST_HEAD(&xt[i].match);
874 INIT_LIST_HEAD(&xt[i].tables);
875 } 1027 }
876 return 0; 1028 rv = register_pernet_subsys(&xt_net_ops);
1029 if (rv < 0)
1030 kfree(xt);
1031 return rv;
877} 1032}
878 1033
879static void __exit xt_fini(void) 1034static void __exit xt_fini(void)
880{ 1035{
1036 unregister_pernet_subsys(&xt_net_ops);
881 kfree(xt); 1037 kfree(xt);
882} 1038}
883 1039
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 77eeae658d42..77a52bf83225 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -22,17 +22,14 @@
22 22
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
25MODULE_DESCRIPTION("iptables qdisc classification target module"); 25MODULE_DESCRIPTION("Xtables: Qdisc classification");
26MODULE_ALIAS("ipt_CLASSIFY"); 26MODULE_ALIAS("ipt_CLASSIFY");
27MODULE_ALIAS("ip6t_CLASSIFY"); 27MODULE_ALIAS("ip6t_CLASSIFY");
28 28
29static unsigned int 29static unsigned int
30target(struct sk_buff *skb, 30classify_tg(struct sk_buff *skb, const struct net_device *in,
31 const struct net_device *in, 31 const struct net_device *out, unsigned int hooknum,
32 const struct net_device *out, 32 const struct xt_target *target, const void *targinfo)
33 unsigned int hooknum,
34 const struct xt_target *target,
35 const void *targinfo)
36{ 33{
37 const struct xt_classify_target_info *clinfo = targinfo; 34 const struct xt_classify_target_info *clinfo = targinfo;
38 35
@@ -40,42 +37,41 @@ target(struct sk_buff *skb,
40 return XT_CONTINUE; 37 return XT_CONTINUE;
41} 38}
42 39
43static struct xt_target xt_classify_target[] __read_mostly = { 40static struct xt_target classify_tg_reg[] __read_mostly = {
44 { 41 {
45 .family = AF_INET, 42 .family = AF_INET,
46 .name = "CLASSIFY", 43 .name = "CLASSIFY",
47 .target = target, 44 .target = classify_tg,
48 .targetsize = sizeof(struct xt_classify_target_info), 45 .targetsize = sizeof(struct xt_classify_target_info),
49 .table = "mangle", 46 .table = "mangle",
50 .hooks = (1 << NF_IP_LOCAL_OUT) | 47 .hooks = (1 << NF_INET_LOCAL_OUT) |
51 (1 << NF_IP_FORWARD) | 48 (1 << NF_INET_FORWARD) |
52 (1 << NF_IP_POST_ROUTING), 49 (1 << NF_INET_POST_ROUTING),
53 .me = THIS_MODULE, 50 .me = THIS_MODULE,
54 }, 51 },
55 { 52 {
56 .name = "CLASSIFY", 53 .name = "CLASSIFY",
57 .family = AF_INET6, 54 .family = AF_INET6,
58 .target = target, 55 .target = classify_tg,
59 .targetsize = sizeof(struct xt_classify_target_info), 56 .targetsize = sizeof(struct xt_classify_target_info),
60 .table = "mangle", 57 .table = "mangle",
61 .hooks = (1 << NF_IP6_LOCAL_OUT) | 58 .hooks = (1 << NF_INET_LOCAL_OUT) |
62 (1 << NF_IP6_FORWARD) | 59 (1 << NF_INET_FORWARD) |
63 (1 << NF_IP6_POST_ROUTING), 60 (1 << NF_INET_POST_ROUTING),
64 .me = THIS_MODULE, 61 .me = THIS_MODULE,
65 }, 62 },
66}; 63};
67 64
68static int __init xt_classify_init(void) 65static int __init classify_tg_init(void)
69{ 66{
70 return xt_register_targets(xt_classify_target, 67 return xt_register_targets(classify_tg_reg,
71 ARRAY_SIZE(xt_classify_target)); 68 ARRAY_SIZE(classify_tg_reg));
72} 69}
73 70
74static void __exit xt_classify_fini(void) 71static void __exit classify_tg_exit(void)
75{ 72{
76 xt_unregister_targets(xt_classify_target, 73 xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
77 ARRAY_SIZE(xt_classify_target));
78} 74}
79 75
80module_init(xt_classify_init); 76module_init(classify_tg_init);
81module_exit(xt_classify_fini); 77module_exit(classify_tg_exit);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 856793e8db7a..5fecfb4794b1 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -1,8 +1,10 @@
1/* This kernel module is used to modify the connection mark values, or 1/*
2 * to optionally restore the skb nfmark from the connection mark 2 * xt_CONNMARK - Netfilter module to modify the connection mark values
3 * 3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> 4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com> 5 * by Henrik Nordstrom <hno@marasystems.com>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * Jan Engelhardt <jengelh@computergmbh.de>
6 * 8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -24,7 +26,7 @@
24#include <net/checksum.h> 26#include <net/checksum.h>
25 27
26MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); 28MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
27MODULE_DESCRIPTION("IP tables CONNMARK matching module"); 29MODULE_DESCRIPTION("Xtables: connection mark modification");
28MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
29MODULE_ALIAS("ipt_CONNMARK"); 31MODULE_ALIAS("ipt_CONNMARK");
30MODULE_ALIAS("ip6t_CONNMARK"); 32MODULE_ALIAS("ip6t_CONNMARK");
@@ -34,12 +36,9 @@ MODULE_ALIAS("ip6t_CONNMARK");
34#include <net/netfilter/nf_conntrack_ecache.h> 36#include <net/netfilter/nf_conntrack_ecache.h>
35 37
36static unsigned int 38static unsigned int
37target(struct sk_buff *skb, 39connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
38 const struct net_device *in, 40 const struct net_device *out, unsigned int hooknum,
39 const struct net_device *out, 41 const struct xt_target *target, const void *targinfo)
40 unsigned int hooknum,
41 const struct xt_target *target,
42 const void *targinfo)
43{ 42{
44 const struct xt_connmark_target_info *markinfo = targinfo; 43 const struct xt_connmark_target_info *markinfo = targinfo;
45 struct nf_conn *ct; 44 struct nf_conn *ct;
@@ -77,20 +76,53 @@ target(struct sk_buff *skb,
77 return XT_CONTINUE; 76 return XT_CONTINUE;
78} 77}
79 78
79static unsigned int
80connmark_tg(struct sk_buff *skb, const struct net_device *in,
81 const struct net_device *out, unsigned int hooknum,
82 const struct xt_target *target, const void *targinfo)
83{
84 const struct xt_connmark_tginfo1 *info = targinfo;
85 enum ip_conntrack_info ctinfo;
86 struct nf_conn *ct;
87 u_int32_t newmark;
88
89 ct = nf_ct_get(skb, &ctinfo);
90 if (ct == NULL)
91 return XT_CONTINUE;
92
93 switch (info->mode) {
94 case XT_CONNMARK_SET:
95 newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
96 if (ct->mark != newmark) {
97 ct->mark = newmark;
98 nf_conntrack_event_cache(IPCT_MARK, skb);
99 }
100 break;
101 case XT_CONNMARK_SAVE:
102 newmark = (ct->mark & ~info->ctmask) ^
103 (skb->mark & info->nfmask);
104 if (ct->mark != newmark) {
105 ct->mark = newmark;
106 nf_conntrack_event_cache(IPCT_MARK, skb);
107 }
108 break;
109 case XT_CONNMARK_RESTORE:
110 newmark = (skb->mark & ~info->nfmask) ^
111 (ct->mark & info->ctmask);
112 skb->mark = newmark;
113 break;
114 }
115
116 return XT_CONTINUE;
117}
118
80static bool 119static bool
81checkentry(const char *tablename, 120connmark_tg_check_v0(const char *tablename, const void *entry,
82 const void *entry, 121 const struct xt_target *target, void *targinfo,
83 const struct xt_target *target, 122 unsigned int hook_mask)
84 void *targinfo,
85 unsigned int hook_mask)
86{ 123{
87 const struct xt_connmark_target_info *matchinfo = targinfo; 124 const struct xt_connmark_target_info *matchinfo = targinfo;
88 125
89 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
90 printk(KERN_WARNING "can't load conntrack support for "
91 "proto=%d\n", target->family);
92 return false;
93 }
94 if (matchinfo->mode == XT_CONNMARK_RESTORE) { 126 if (matchinfo->mode == XT_CONNMARK_RESTORE) {
95 if (strcmp(tablename, "mangle") != 0) { 127 if (strcmp(tablename, "mangle") != 0) {
96 printk(KERN_WARNING "CONNMARK: restore can only be " 128 printk(KERN_WARNING "CONNMARK: restore can only be "
@@ -103,11 +135,29 @@ checkentry(const char *tablename,
103 printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); 135 printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
104 return false; 136 return false;
105 } 137 }
138 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
139 printk(KERN_WARNING "can't load conntrack support for "
140 "proto=%u\n", target->family);
141 return false;
142 }
143 return true;
144}
145
146static bool
147connmark_tg_check(const char *tablename, const void *entry,
148 const struct xt_target *target, void *targinfo,
149 unsigned int hook_mask)
150{
151 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
152 printk(KERN_WARNING "cannot load conntrack support for "
153 "proto=%u\n", target->family);
154 return false;
155 }
106 return true; 156 return true;
107} 157}
108 158
109static void 159static void
110destroy(const struct xt_target *target, void *targinfo) 160connmark_tg_destroy(const struct xt_target *target, void *targinfo)
111{ 161{
112 nf_ct_l3proto_module_put(target->family); 162 nf_ct_l3proto_module_put(target->family);
113} 163}
@@ -120,7 +170,7 @@ struct compat_xt_connmark_target_info {
120 u_int16_t __pad2; 170 u_int16_t __pad2;
121}; 171};
122 172
123static void compat_from_user(void *dst, void *src) 173static void connmark_tg_compat_from_user_v0(void *dst, void *src)
124{ 174{
125 const struct compat_xt_connmark_target_info *cm = src; 175 const struct compat_xt_connmark_target_info *cm = src;
126 struct xt_connmark_target_info m = { 176 struct xt_connmark_target_info m = {
@@ -131,7 +181,7 @@ static void compat_from_user(void *dst, void *src)
131 memcpy(dst, &m, sizeof(m)); 181 memcpy(dst, &m, sizeof(m));
132} 182}
133 183
134static int compat_to_user(void __user *dst, void *src) 184static int connmark_tg_compat_to_user_v0(void __user *dst, void *src)
135{ 185{
136 const struct xt_connmark_target_info *m = src; 186 const struct xt_connmark_target_info *m = src;
137 struct compat_xt_connmark_target_info cm = { 187 struct compat_xt_connmark_target_info cm = {
@@ -143,43 +193,69 @@ static int compat_to_user(void __user *dst, void *src)
143} 193}
144#endif /* CONFIG_COMPAT */ 194#endif /* CONFIG_COMPAT */
145 195
146static struct xt_target xt_connmark_target[] __read_mostly = { 196static struct xt_target connmark_tg_reg[] __read_mostly = {
147 { 197 {
148 .name = "CONNMARK", 198 .name = "CONNMARK",
199 .revision = 0,
149 .family = AF_INET, 200 .family = AF_INET,
150 .checkentry = checkentry, 201 .checkentry = connmark_tg_check_v0,
151 .destroy = destroy, 202 .destroy = connmark_tg_destroy,
152 .target = target, 203 .target = connmark_tg_v0,
153 .targetsize = sizeof(struct xt_connmark_target_info), 204 .targetsize = sizeof(struct xt_connmark_target_info),
154#ifdef CONFIG_COMPAT 205#ifdef CONFIG_COMPAT
155 .compatsize = sizeof(struct compat_xt_connmark_target_info), 206 .compatsize = sizeof(struct compat_xt_connmark_target_info),
156 .compat_from_user = compat_from_user, 207 .compat_from_user = connmark_tg_compat_from_user_v0,
157 .compat_to_user = compat_to_user, 208 .compat_to_user = connmark_tg_compat_to_user_v0,
158#endif 209#endif
159 .me = THIS_MODULE 210 .me = THIS_MODULE
160 }, 211 },
161 { 212 {
162 .name = "CONNMARK", 213 .name = "CONNMARK",
214 .revision = 0,
163 .family = AF_INET6, 215 .family = AF_INET6,
164 .checkentry = checkentry, 216 .checkentry = connmark_tg_check_v0,
165 .destroy = destroy, 217 .destroy = connmark_tg_destroy,
166 .target = target, 218 .target = connmark_tg_v0,
167 .targetsize = sizeof(struct xt_connmark_target_info), 219 .targetsize = sizeof(struct xt_connmark_target_info),
220#ifdef CONFIG_COMPAT
221 .compatsize = sizeof(struct compat_xt_connmark_target_info),
222 .compat_from_user = connmark_tg_compat_from_user_v0,
223 .compat_to_user = connmark_tg_compat_to_user_v0,
224#endif
168 .me = THIS_MODULE 225 .me = THIS_MODULE
169 }, 226 },
227 {
228 .name = "CONNMARK",
229 .revision = 1,
230 .family = AF_INET,
231 .checkentry = connmark_tg_check,
232 .target = connmark_tg,
233 .targetsize = sizeof(struct xt_connmark_tginfo1),
234 .destroy = connmark_tg_destroy,
235 .me = THIS_MODULE,
236 },
237 {
238 .name = "CONNMARK",
239 .revision = 1,
240 .family = AF_INET6,
241 .checkentry = connmark_tg_check,
242 .target = connmark_tg,
243 .targetsize = sizeof(struct xt_connmark_tginfo1),
244 .destroy = connmark_tg_destroy,
245 .me = THIS_MODULE,
246 },
170}; 247};
171 248
172static int __init xt_connmark_init(void) 249static int __init connmark_tg_init(void)
173{ 250{
174 return xt_register_targets(xt_connmark_target, 251 return xt_register_targets(connmark_tg_reg,
175 ARRAY_SIZE(xt_connmark_target)); 252 ARRAY_SIZE(connmark_tg_reg));
176} 253}
177 254
178static void __exit xt_connmark_fini(void) 255static void __exit connmark_tg_exit(void)
179{ 256{
180 xt_unregister_targets(xt_connmark_target, 257 xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
181 ARRAY_SIZE(xt_connmark_target));
182} 258}
183 259
184module_init(xt_connmark_init); 260module_init(connmark_tg_init);
185module_exit(xt_connmark_fini); 261module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 021b5c8d20e2..1faa9136195d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -20,12 +20,13 @@
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_CONNSECMARK.h> 21#include <linux/netfilter/xt_CONNSECMARK.h>
22#include <net/netfilter/nf_conntrack.h> 22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_ecache.h>
23 24
24#define PFX "CONNSECMARK: " 25#define PFX "CONNSECMARK: "
25 26
26MODULE_LICENSE("GPL"); 27MODULE_LICENSE("GPL");
27MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); 28MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
28MODULE_DESCRIPTION("ip[6]tables CONNSECMARK module"); 29MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
29MODULE_ALIAS("ipt_CONNSECMARK"); 30MODULE_ALIAS("ipt_CONNSECMARK");
30MODULE_ALIAS("ip6t_CONNSECMARK"); 31MODULE_ALIAS("ip6t_CONNSECMARK");
31 32
@@ -40,8 +41,10 @@ static void secmark_save(const struct sk_buff *skb)
40 enum ip_conntrack_info ctinfo; 41 enum ip_conntrack_info ctinfo;
41 42
42 ct = nf_ct_get(skb, &ctinfo); 43 ct = nf_ct_get(skb, &ctinfo);
43 if (ct && !ct->secmark) 44 if (ct && !ct->secmark) {
44 ct->secmark = skb->secmark; 45 ct->secmark = skb->secmark;
46 nf_conntrack_event_cache(IPCT_SECMARK, skb);
47 }
45 } 48 }
46} 49}
47 50
@@ -61,10 +64,10 @@ static void secmark_restore(struct sk_buff *skb)
61 } 64 }
62} 65}
63 66
64static unsigned int target(struct sk_buff *skb, const struct net_device *in, 67static unsigned int
65 const struct net_device *out, unsigned int hooknum, 68connsecmark_tg(struct sk_buff *skb, const struct net_device *in,
66 const struct xt_target *target, 69 const struct net_device *out, unsigned int hooknum,
67 const void *targinfo) 70 const struct xt_target *target, const void *targinfo)
68{ 71{
69 const struct xt_connsecmark_target_info *info = targinfo; 72 const struct xt_connsecmark_target_info *info = targinfo;
70 73
@@ -84,17 +87,13 @@ static unsigned int target(struct sk_buff *skb, const struct net_device *in,
84 return XT_CONTINUE; 87 return XT_CONTINUE;
85} 88}
86 89
87static bool checkentry(const char *tablename, const void *entry, 90static bool
88 const struct xt_target *target, void *targinfo, 91connsecmark_tg_check(const char *tablename, const void *entry,
89 unsigned int hook_mask) 92 const struct xt_target *target, void *targinfo,
93 unsigned int hook_mask)
90{ 94{
91 const struct xt_connsecmark_target_info *info = targinfo; 95 const struct xt_connsecmark_target_info *info = targinfo;
92 96
93 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
94 printk(KERN_WARNING "can't load conntrack support for "
95 "proto=%d\n", target->family);
96 return false;
97 }
98 switch (info->mode) { 97 switch (info->mode) {
99 case CONNSECMARK_SAVE: 98 case CONNSECMARK_SAVE:
100 case CONNSECMARK_RESTORE: 99 case CONNSECMARK_RESTORE:
@@ -105,22 +104,27 @@ static bool checkentry(const char *tablename, const void *entry,
105 return false; 104 return false;
106 } 105 }
107 106
107 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
108 printk(KERN_WARNING "can't load conntrack support for "
109 "proto=%u\n", target->family);
110 return false;
111 }
108 return true; 112 return true;
109} 113}
110 114
111static void 115static void
112destroy(const struct xt_target *target, void *targinfo) 116connsecmark_tg_destroy(const struct xt_target *target, void *targinfo)
113{ 117{
114 nf_ct_l3proto_module_put(target->family); 118 nf_ct_l3proto_module_put(target->family);
115} 119}
116 120
117static struct xt_target xt_connsecmark_target[] __read_mostly = { 121static struct xt_target connsecmark_tg_reg[] __read_mostly = {
118 { 122 {
119 .name = "CONNSECMARK", 123 .name = "CONNSECMARK",
120 .family = AF_INET, 124 .family = AF_INET,
121 .checkentry = checkentry, 125 .checkentry = connsecmark_tg_check,
122 .destroy = destroy, 126 .destroy = connsecmark_tg_destroy,
123 .target = target, 127 .target = connsecmark_tg,
124 .targetsize = sizeof(struct xt_connsecmark_target_info), 128 .targetsize = sizeof(struct xt_connsecmark_target_info),
125 .table = "mangle", 129 .table = "mangle",
126 .me = THIS_MODULE, 130 .me = THIS_MODULE,
@@ -128,26 +132,26 @@ static struct xt_target xt_connsecmark_target[] __read_mostly = {
128 { 132 {
129 .name = "CONNSECMARK", 133 .name = "CONNSECMARK",
130 .family = AF_INET6, 134 .family = AF_INET6,
131 .checkentry = checkentry, 135 .checkentry = connsecmark_tg_check,
132 .destroy = destroy, 136 .destroy = connsecmark_tg_destroy,
133 .target = target, 137 .target = connsecmark_tg,
134 .targetsize = sizeof(struct xt_connsecmark_target_info), 138 .targetsize = sizeof(struct xt_connsecmark_target_info),
135 .table = "mangle", 139 .table = "mangle",
136 .me = THIS_MODULE, 140 .me = THIS_MODULE,
137 }, 141 },
138}; 142};
139 143
140static int __init xt_connsecmark_init(void) 144static int __init connsecmark_tg_init(void)
141{ 145{
142 return xt_register_targets(xt_connsecmark_target, 146 return xt_register_targets(connsecmark_tg_reg,
143 ARRAY_SIZE(xt_connsecmark_target)); 147 ARRAY_SIZE(connsecmark_tg_reg));
144} 148}
145 149
146static void __exit xt_connsecmark_fini(void) 150static void __exit connsecmark_tg_exit(void)
147{ 151{
148 xt_unregister_targets(xt_connsecmark_target, 152 xt_unregister_targets(connsecmark_tg_reg,
149 ARRAY_SIZE(xt_connsecmark_target)); 153 ARRAY_SIZE(connsecmark_tg_reg));
150} 154}
151 155
152module_init(xt_connsecmark_init); 156module_init(connsecmark_tg_init);
153module_exit(xt_connsecmark_fini); 157module_exit(connsecmark_tg_exit);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 6322a933ab71..97efd74c04fe 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -18,19 +18,20 @@
18 18
19#include <linux/netfilter/x_tables.h> 19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter/xt_DSCP.h> 20#include <linux/netfilter/xt_DSCP.h>
21#include <linux/netfilter_ipv4/ipt_TOS.h>
21 22
22MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
23MODULE_DESCRIPTION("x_tables DSCP modification module"); 24MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
24MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
25MODULE_ALIAS("ipt_DSCP"); 26MODULE_ALIAS("ipt_DSCP");
26MODULE_ALIAS("ip6t_DSCP"); 27MODULE_ALIAS("ip6t_DSCP");
28MODULE_ALIAS("ipt_TOS");
29MODULE_ALIAS("ip6t_TOS");
27 30
28static unsigned int target(struct sk_buff *skb, 31static unsigned int
29 const struct net_device *in, 32dscp_tg(struct sk_buff *skb, const struct net_device *in,
30 const struct net_device *out, 33 const struct net_device *out, unsigned int hooknum,
31 unsigned int hooknum, 34 const struct xt_target *target, const void *targinfo)
32 const struct xt_target *target,
33 const void *targinfo)
34{ 35{
35 const struct xt_DSCP_info *dinfo = targinfo; 36 const struct xt_DSCP_info *dinfo = targinfo;
36 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; 37 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -46,12 +47,10 @@ static unsigned int target(struct sk_buff *skb,
46 return XT_CONTINUE; 47 return XT_CONTINUE;
47} 48}
48 49
49static unsigned int target6(struct sk_buff *skb, 50static unsigned int
50 const struct net_device *in, 51dscp_tg6(struct sk_buff *skb, const struct net_device *in,
51 const struct net_device *out, 52 const struct net_device *out, unsigned int hooknum,
52 unsigned int hooknum, 53 const struct xt_target *target, const void *targinfo)
53 const struct xt_target *target,
54 const void *targinfo)
55{ 54{
56 const struct xt_DSCP_info *dinfo = targinfo; 55 const struct xt_DSCP_info *dinfo = targinfo;
57 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; 56 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -66,11 +65,10 @@ static unsigned int target6(struct sk_buff *skb,
66 return XT_CONTINUE; 65 return XT_CONTINUE;
67} 66}
68 67
69static bool checkentry(const char *tablename, 68static bool
70 const void *e_void, 69dscp_tg_check(const char *tablename, const void *e_void,
71 const struct xt_target *target, 70 const struct xt_target *target, void *targinfo,
72 void *targinfo, 71 unsigned int hook_mask)
73 unsigned int hook_mask)
74{ 72{
75 const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; 73 const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
76 74
@@ -81,12 +79,95 @@ static bool checkentry(const char *tablename,
81 return true; 79 return true;
82} 80}
83 81
84static struct xt_target xt_dscp_target[] __read_mostly = { 82static unsigned int
83tos_tg_v0(struct sk_buff *skb, const struct net_device *in,
84 const struct net_device *out, unsigned int hooknum,
85 const struct xt_target *target, const void *targinfo)
86{
87 const struct ipt_tos_target_info *info = targinfo;
88 struct iphdr *iph = ip_hdr(skb);
89 u_int8_t oldtos;
90
91 if ((iph->tos & IPTOS_TOS_MASK) != info->tos) {
92 if (!skb_make_writable(skb, sizeof(struct iphdr)))
93 return NF_DROP;
94
95 iph = ip_hdr(skb);
96 oldtos = iph->tos;
97 iph->tos = (iph->tos & IPTOS_PREC_MASK) | info->tos;
98 csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
99 }
100
101 return XT_CONTINUE;
102}
103
104static bool
105tos_tg_check_v0(const char *tablename, const void *e_void,
106 const struct xt_target *target, void *targinfo,
107 unsigned int hook_mask)
108{
109 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
110
111 if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
112 tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
113 tos != IPTOS_NORMALSVC) {
114 printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
115 return false;
116 }
117
118 return true;
119}
120
121static unsigned int
122tos_tg(struct sk_buff *skb, const struct net_device *in,
123 const struct net_device *out, unsigned int hooknum,
124 const struct xt_target *target, const void *targinfo)
125{
126 const struct xt_tos_target_info *info = targinfo;
127 struct iphdr *iph = ip_hdr(skb);
128 u_int8_t orig, nv;
129
130 orig = ipv4_get_dsfield(iph);
131 nv = (orig & ~info->tos_mask) ^ info->tos_value;
132
133 if (orig != nv) {
134 if (!skb_make_writable(skb, sizeof(struct iphdr)))
135 return NF_DROP;
136 iph = ip_hdr(skb);
137 ipv4_change_dsfield(iph, 0, nv);
138 }
139
140 return XT_CONTINUE;
141}
142
143static unsigned int
144tos_tg6(struct sk_buff *skb, const struct net_device *in,
145 const struct net_device *out, unsigned int hooknum,
146 const struct xt_target *target, const void *targinfo)
147{
148 const struct xt_tos_target_info *info = targinfo;
149 struct ipv6hdr *iph = ipv6_hdr(skb);
150 u_int8_t orig, nv;
151
152 orig = ipv6_get_dsfield(iph);
153 nv = (orig & info->tos_mask) ^ info->tos_value;
154
155 if (orig != nv) {
156 if (!skb_make_writable(skb, sizeof(struct iphdr)))
157 return NF_DROP;
158 iph = ipv6_hdr(skb);
159 ipv6_change_dsfield(iph, 0, nv);
160 }
161
162 return XT_CONTINUE;
163}
164
165static struct xt_target dscp_tg_reg[] __read_mostly = {
85 { 166 {
86 .name = "DSCP", 167 .name = "DSCP",
87 .family = AF_INET, 168 .family = AF_INET,
88 .checkentry = checkentry, 169 .checkentry = dscp_tg_check,
89 .target = target, 170 .target = dscp_tg,
90 .targetsize = sizeof(struct xt_DSCP_info), 171 .targetsize = sizeof(struct xt_DSCP_info),
91 .table = "mangle", 172 .table = "mangle",
92 .me = THIS_MODULE, 173 .me = THIS_MODULE,
@@ -94,23 +175,51 @@ static struct xt_target xt_dscp_target[] __read_mostly = {
94 { 175 {
95 .name = "DSCP", 176 .name = "DSCP",
96 .family = AF_INET6, 177 .family = AF_INET6,
97 .checkentry = checkentry, 178 .checkentry = dscp_tg_check,
98 .target = target6, 179 .target = dscp_tg6,
99 .targetsize = sizeof(struct xt_DSCP_info), 180 .targetsize = sizeof(struct xt_DSCP_info),
100 .table = "mangle", 181 .table = "mangle",
101 .me = THIS_MODULE, 182 .me = THIS_MODULE,
102 }, 183 },
184 {
185 .name = "TOS",
186 .revision = 0,
187 .family = AF_INET,
188 .table = "mangle",
189 .target = tos_tg_v0,
190 .targetsize = sizeof(struct ipt_tos_target_info),
191 .checkentry = tos_tg_check_v0,
192 .me = THIS_MODULE,
193 },
194 {
195 .name = "TOS",
196 .revision = 1,
197 .family = AF_INET,
198 .table = "mangle",
199 .target = tos_tg,
200 .targetsize = sizeof(struct xt_tos_target_info),
201 .me = THIS_MODULE,
202 },
203 {
204 .name = "TOS",
205 .revision = 1,
206 .family = AF_INET6,
207 .table = "mangle",
208 .target = tos_tg6,
209 .targetsize = sizeof(struct xt_tos_target_info),
210 .me = THIS_MODULE,
211 },
103}; 212};
104 213
105static int __init xt_dscp_target_init(void) 214static int __init dscp_tg_init(void)
106{ 215{
107 return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); 216 return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
108} 217}
109 218
110static void __exit xt_dscp_target_fini(void) 219static void __exit dscp_tg_exit(void)
111{ 220{
112 xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); 221 xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
113} 222}
114 223
115module_init(xt_dscp_target_init); 224module_init(dscp_tg_init);
116module_exit(xt_dscp_target_fini); 225module_exit(dscp_tg_exit);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index bc6503d77d75..f9ce20b58981 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -1,10 +1,13 @@
1/* This is a module which is used for setting the NFMARK field of an skb. */ 1/*
2 2 * xt_MARK - Netfilter module to modify the NFMARK field of an skb
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca> 3 *
4 * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
5 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
6 * Jan Engelhardt <jengelh@computergmbh.de>
4 * 7 *
5 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
8 */ 11 */
9 12
10#include <linux/module.h> 13#include <linux/module.h>
@@ -17,17 +20,14 @@
17 20
18MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
20MODULE_DESCRIPTION("ip[6]tables MARK modification module"); 23MODULE_DESCRIPTION("Xtables: packet mark modification");
21MODULE_ALIAS("ipt_MARK"); 24MODULE_ALIAS("ipt_MARK");
22MODULE_ALIAS("ip6t_MARK"); 25MODULE_ALIAS("ip6t_MARK");
23 26
24static unsigned int 27static unsigned int
25target_v0(struct sk_buff *skb, 28mark_tg_v0(struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *in, 29 const struct net_device *out, unsigned int hooknum,
27 const struct net_device *out, 30 const struct xt_target *target, const void *targinfo)
28 unsigned int hooknum,
29 const struct xt_target *target,
30 const void *targinfo)
31{ 31{
32 const struct xt_mark_target_info *markinfo = targinfo; 32 const struct xt_mark_target_info *markinfo = targinfo;
33 33
@@ -36,12 +36,9 @@ target_v0(struct sk_buff *skb,
36} 36}
37 37
38static unsigned int 38static unsigned int
39target_v1(struct sk_buff *skb, 39mark_tg_v1(struct sk_buff *skb, const struct net_device *in,
40 const struct net_device *in, 40 const struct net_device *out, unsigned int hooknum,
41 const struct net_device *out, 41 const struct xt_target *target, const void *targinfo)
42 unsigned int hooknum,
43 const struct xt_target *target,
44 const void *targinfo)
45{ 42{
46 const struct xt_mark_target_info_v1 *markinfo = targinfo; 43 const struct xt_mark_target_info_v1 *markinfo = targinfo;
47 int mark = 0; 44 int mark = 0;
@@ -64,13 +61,21 @@ target_v1(struct sk_buff *skb,
64 return XT_CONTINUE; 61 return XT_CONTINUE;
65} 62}
66 63
64static unsigned int
65mark_tg(struct sk_buff *skb, const struct net_device *in,
66 const struct net_device *out, unsigned int hooknum,
67 const struct xt_target *target, const void *targinfo)
68{
69 const struct xt_mark_tginfo2 *info = targinfo;
70
71 skb->mark = (skb->mark & ~info->mask) ^ info->mark;
72 return XT_CONTINUE;
73}
67 74
68static bool 75static bool
69checkentry_v0(const char *tablename, 76mark_tg_check_v0(const char *tablename, const void *entry,
70 const void *entry, 77 const struct xt_target *target, void *targinfo,
71 const struct xt_target *target, 78 unsigned int hook_mask)
72 void *targinfo,
73 unsigned int hook_mask)
74{ 79{
75 const struct xt_mark_target_info *markinfo = targinfo; 80 const struct xt_mark_target_info *markinfo = targinfo;
76 81
@@ -82,11 +87,9 @@ checkentry_v0(const char *tablename,
82} 87}
83 88
84static bool 89static bool
85checkentry_v1(const char *tablename, 90mark_tg_check_v1(const char *tablename, const void *entry,
86 const void *entry, 91 const struct xt_target *target, void *targinfo,
87 const struct xt_target *target, 92 unsigned int hook_mask)
88 void *targinfo,
89 unsigned int hook_mask)
90{ 93{
91 const struct xt_mark_target_info_v1 *markinfo = targinfo; 94 const struct xt_mark_target_info_v1 *markinfo = targinfo;
92 95
@@ -105,6 +108,28 @@ checkentry_v1(const char *tablename,
105} 108}
106 109
107#ifdef CONFIG_COMPAT 110#ifdef CONFIG_COMPAT
111struct compat_xt_mark_target_info {
112 compat_ulong_t mark;
113};
114
115static void mark_tg_compat_from_user_v0(void *dst, void *src)
116{
117 const struct compat_xt_mark_target_info *cm = src;
118 struct xt_mark_target_info m = {
119 .mark = cm->mark,
120 };
121 memcpy(dst, &m, sizeof(m));
122}
123
124static int mark_tg_compat_to_user_v0(void __user *dst, void *src)
125{
126 const struct xt_mark_target_info *m = src;
127 struct compat_xt_mark_target_info cm = {
128 .mark = m->mark,
129 };
130 return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
131}
132
108struct compat_xt_mark_target_info_v1 { 133struct compat_xt_mark_target_info_v1 {
109 compat_ulong_t mark; 134 compat_ulong_t mark;
110 u_int8_t mode; 135 u_int8_t mode;
@@ -112,7 +137,7 @@ struct compat_xt_mark_target_info_v1 {
112 u_int16_t __pad2; 137 u_int16_t __pad2;
113}; 138};
114 139
115static void compat_from_user_v1(void *dst, void *src) 140static void mark_tg_compat_from_user_v1(void *dst, void *src)
116{ 141{
117 const struct compat_xt_mark_target_info_v1 *cm = src; 142 const struct compat_xt_mark_target_info_v1 *cm = src;
118 struct xt_mark_target_info_v1 m = { 143 struct xt_mark_target_info_v1 m = {
@@ -122,7 +147,7 @@ static void compat_from_user_v1(void *dst, void *src)
122 memcpy(dst, &m, sizeof(m)); 147 memcpy(dst, &m, sizeof(m));
123} 148}
124 149
125static int compat_to_user_v1(void __user *dst, void *src) 150static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
126{ 151{
127 const struct xt_mark_target_info_v1 *m = src; 152 const struct xt_mark_target_info_v1 *m = src;
128 struct compat_xt_mark_target_info_v1 cm = { 153 struct compat_xt_mark_target_info_v1 cm = {
@@ -133,14 +158,19 @@ static int compat_to_user_v1(void __user *dst, void *src)
133} 158}
134#endif /* CONFIG_COMPAT */ 159#endif /* CONFIG_COMPAT */
135 160
136static struct xt_target xt_mark_target[] __read_mostly = { 161static struct xt_target mark_tg_reg[] __read_mostly = {
137 { 162 {
138 .name = "MARK", 163 .name = "MARK",
139 .family = AF_INET, 164 .family = AF_INET,
140 .revision = 0, 165 .revision = 0,
141 .checkentry = checkentry_v0, 166 .checkentry = mark_tg_check_v0,
142 .target = target_v0, 167 .target = mark_tg_v0,
143 .targetsize = sizeof(struct xt_mark_target_info), 168 .targetsize = sizeof(struct xt_mark_target_info),
169#ifdef CONFIG_COMPAT
170 .compatsize = sizeof(struct compat_xt_mark_target_info),
171 .compat_from_user = mark_tg_compat_from_user_v0,
172 .compat_to_user = mark_tg_compat_to_user_v0,
173#endif
144 .table = "mangle", 174 .table = "mangle",
145 .me = THIS_MODULE, 175 .me = THIS_MODULE,
146 }, 176 },
@@ -148,13 +178,13 @@ static struct xt_target xt_mark_target[] __read_mostly = {
148 .name = "MARK", 178 .name = "MARK",
149 .family = AF_INET, 179 .family = AF_INET,
150 .revision = 1, 180 .revision = 1,
151 .checkentry = checkentry_v1, 181 .checkentry = mark_tg_check_v1,
152 .target = target_v1, 182 .target = mark_tg_v1,
153 .targetsize = sizeof(struct xt_mark_target_info_v1), 183 .targetsize = sizeof(struct xt_mark_target_info_v1),
154#ifdef CONFIG_COMPAT 184#ifdef CONFIG_COMPAT
155 .compatsize = sizeof(struct compat_xt_mark_target_info_v1), 185 .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
156 .compat_from_user = compat_from_user_v1, 186 .compat_from_user = mark_tg_compat_from_user_v1,
157 .compat_to_user = compat_to_user_v1, 187 .compat_to_user = mark_tg_compat_to_user_v1,
158#endif 188#endif
159 .table = "mangle", 189 .table = "mangle",
160 .me = THIS_MODULE, 190 .me = THIS_MODULE,
@@ -163,23 +193,59 @@ static struct xt_target xt_mark_target[] __read_mostly = {
163 .name = "MARK", 193 .name = "MARK",
164 .family = AF_INET6, 194 .family = AF_INET6,
165 .revision = 0, 195 .revision = 0,
166 .checkentry = checkentry_v0, 196 .checkentry = mark_tg_check_v0,
167 .target = target_v0, 197 .target = mark_tg_v0,
168 .targetsize = sizeof(struct xt_mark_target_info), 198 .targetsize = sizeof(struct xt_mark_target_info),
199#ifdef CONFIG_COMPAT
200 .compatsize = sizeof(struct compat_xt_mark_target_info),
201 .compat_from_user = mark_tg_compat_from_user_v0,
202 .compat_to_user = mark_tg_compat_to_user_v0,
203#endif
204 .table = "mangle",
205 .me = THIS_MODULE,
206 },
207 {
208 .name = "MARK",
209 .family = AF_INET6,
210 .revision = 1,
211 .checkentry = mark_tg_check_v1,
212 .target = mark_tg_v1,
213 .targetsize = sizeof(struct xt_mark_target_info_v1),
214#ifdef CONFIG_COMPAT
215 .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
216 .compat_from_user = mark_tg_compat_from_user_v1,
217 .compat_to_user = mark_tg_compat_to_user_v1,
218#endif
169 .table = "mangle", 219 .table = "mangle",
170 .me = THIS_MODULE, 220 .me = THIS_MODULE,
171 }, 221 },
222 {
223 .name = "MARK",
224 .revision = 2,
225 .family = AF_INET,
226 .target = mark_tg,
227 .targetsize = sizeof(struct xt_mark_tginfo2),
228 .me = THIS_MODULE,
229 },
230 {
231 .name = "MARK",
232 .revision = 2,
233 .family = AF_INET6,
234 .target = mark_tg,
235 .targetsize = sizeof(struct xt_mark_tginfo2),
236 .me = THIS_MODULE,
237 },
172}; 238};
173 239
174static int __init xt_mark_init(void) 240static int __init mark_tg_init(void)
175{ 241{
176 return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); 242 return xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
177} 243}
178 244
179static void __exit xt_mark_fini(void) 245static void __exit mark_tg_exit(void)
180{ 246{
181 xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); 247 xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
182} 248}
183 249
184module_init(xt_mark_init); 250module_init(mark_tg_init);
185module_exit(xt_mark_fini); 251module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 9fb449ffbf8b..19ae8efae655 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -12,18 +12,18 @@
12 12
13#include <linux/netfilter/x_tables.h> 13#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter/xt_NFLOG.h> 14#include <linux/netfilter/xt_NFLOG.h>
15#include <net/netfilter/nf_log.h>
15 16
16MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 17MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
17MODULE_DESCRIPTION("x_tables NFLOG target"); 18MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
18MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
19MODULE_ALIAS("ipt_NFLOG"); 20MODULE_ALIAS("ipt_NFLOG");
20MODULE_ALIAS("ip6t_NFLOG"); 21MODULE_ALIAS("ip6t_NFLOG");
21 22
22static unsigned int 23static unsigned int
23nflog_target(struct sk_buff *skb, 24nflog_tg(struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, const struct net_device *out, 25 const struct net_device *out, unsigned int hooknum,
25 unsigned int hooknum, const struct xt_target *target, 26 const struct xt_target *target, const void *targinfo)
26 const void *targinfo)
27{ 27{
28 const struct xt_nflog_info *info = targinfo; 28 const struct xt_nflog_info *info = targinfo;
29 struct nf_loginfo li; 29 struct nf_loginfo li;
@@ -39,9 +39,9 @@ nflog_target(struct sk_buff *skb,
39} 39}
40 40
41static bool 41static bool
42nflog_checkentry(const char *tablename, const void *entry, 42nflog_tg_check(const char *tablename, const void *entry,
43 const struct xt_target *target, void *targetinfo, 43 const struct xt_target *target, void *targetinfo,
44 unsigned int hookmask) 44 unsigned int hookmask)
45{ 45{
46 const struct xt_nflog_info *info = targetinfo; 46 const struct xt_nflog_info *info = targetinfo;
47 47
@@ -52,35 +52,34 @@ nflog_checkentry(const char *tablename, const void *entry,
52 return true; 52 return true;
53} 53}
54 54
55static struct xt_target xt_nflog_target[] __read_mostly = { 55static struct xt_target nflog_tg_reg[] __read_mostly = {
56 { 56 {
57 .name = "NFLOG", 57 .name = "NFLOG",
58 .family = AF_INET, 58 .family = AF_INET,
59 .checkentry = nflog_checkentry, 59 .checkentry = nflog_tg_check,
60 .target = nflog_target, 60 .target = nflog_tg,
61 .targetsize = sizeof(struct xt_nflog_info), 61 .targetsize = sizeof(struct xt_nflog_info),
62 .me = THIS_MODULE, 62 .me = THIS_MODULE,
63 }, 63 },
64 { 64 {
65 .name = "NFLOG", 65 .name = "NFLOG",
66 .family = AF_INET6, 66 .family = AF_INET6,
67 .checkentry = nflog_checkentry, 67 .checkentry = nflog_tg_check,
68 .target = nflog_target, 68 .target = nflog_tg,
69 .targetsize = sizeof(struct xt_nflog_info), 69 .targetsize = sizeof(struct xt_nflog_info),
70 .me = THIS_MODULE, 70 .me = THIS_MODULE,
71 }, 71 },
72}; 72};
73 73
74static int __init xt_nflog_init(void) 74static int __init nflog_tg_init(void)
75{ 75{
76 return xt_register_targets(xt_nflog_target, 76 return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
77 ARRAY_SIZE(xt_nflog_target));
78} 77}
79 78
80static void __exit xt_nflog_fini(void) 79static void __exit nflog_tg_exit(void)
81{ 80{
82 xt_unregister_targets(xt_nflog_target, ARRAY_SIZE(xt_nflog_target)); 81 xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
83} 82}
84 83
85module_init(xt_nflog_init); 84module_init(nflog_tg_init);
86module_exit(xt_nflog_fini); 85module_exit(nflog_tg_exit);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index c3984e9f766a..beb24d19a56f 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -17,59 +17,55 @@
17#include <linux/netfilter/xt_NFQUEUE.h> 17#include <linux/netfilter/xt_NFQUEUE.h>
18 18
19MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 19MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
20MODULE_DESCRIPTION("[ip,ip6,arp]_tables NFQUEUE target"); 20MODULE_DESCRIPTION("Xtables: packet forwarding to netlink");
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_ALIAS("ipt_NFQUEUE"); 22MODULE_ALIAS("ipt_NFQUEUE");
23MODULE_ALIAS("ip6t_NFQUEUE"); 23MODULE_ALIAS("ip6t_NFQUEUE");
24MODULE_ALIAS("arpt_NFQUEUE"); 24MODULE_ALIAS("arpt_NFQUEUE");
25 25
26static unsigned int 26static unsigned int
27target(struct sk_buff *skb, 27nfqueue_tg(struct sk_buff *skb, const struct net_device *in,
28 const struct net_device *in, 28 const struct net_device *out, unsigned int hooknum,
29 const struct net_device *out, 29 const struct xt_target *target, const void *targinfo)
30 unsigned int hooknum,
31 const struct xt_target *target,
32 const void *targinfo)
33{ 30{
34 const struct xt_NFQ_info *tinfo = targinfo; 31 const struct xt_NFQ_info *tinfo = targinfo;
35 32
36 return NF_QUEUE_NR(tinfo->queuenum); 33 return NF_QUEUE_NR(tinfo->queuenum);
37} 34}
38 35
39static struct xt_target xt_nfqueue_target[] __read_mostly = { 36static struct xt_target nfqueue_tg_reg[] __read_mostly = {
40 { 37 {
41 .name = "NFQUEUE", 38 .name = "NFQUEUE",
42 .family = AF_INET, 39 .family = AF_INET,
43 .target = target, 40 .target = nfqueue_tg,
44 .targetsize = sizeof(struct xt_NFQ_info), 41 .targetsize = sizeof(struct xt_NFQ_info),
45 .me = THIS_MODULE, 42 .me = THIS_MODULE,
46 }, 43 },
47 { 44 {
48 .name = "NFQUEUE", 45 .name = "NFQUEUE",
49 .family = AF_INET6, 46 .family = AF_INET6,
50 .target = target, 47 .target = nfqueue_tg,
51 .targetsize = sizeof(struct xt_NFQ_info), 48 .targetsize = sizeof(struct xt_NFQ_info),
52 .me = THIS_MODULE, 49 .me = THIS_MODULE,
53 }, 50 },
54 { 51 {
55 .name = "NFQUEUE", 52 .name = "NFQUEUE",
56 .family = NF_ARP, 53 .family = NF_ARP,
57 .target = target, 54 .target = nfqueue_tg,
58 .targetsize = sizeof(struct xt_NFQ_info), 55 .targetsize = sizeof(struct xt_NFQ_info),
59 .me = THIS_MODULE, 56 .me = THIS_MODULE,
60 }, 57 },
61}; 58};
62 59
63static int __init xt_nfqueue_init(void) 60static int __init nfqueue_tg_init(void)
64{ 61{
65 return xt_register_targets(xt_nfqueue_target, 62 return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
66 ARRAY_SIZE(xt_nfqueue_target));
67} 63}
68 64
69static void __exit xt_nfqueue_fini(void) 65static void __exit nfqueue_tg_exit(void)
70{ 66{
71 xt_unregister_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target)); 67 xt_unregister_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
72} 68}
73 69
74module_init(xt_nfqueue_init); 70module_init(nfqueue_tg_init);
75module_exit(xt_nfqueue_fini); 71module_exit(nfqueue_tg_exit);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 4976ce186615..6c9de611eb8d 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -7,17 +7,15 @@
7#include <linux/netfilter/x_tables.h> 7#include <linux/netfilter/x_tables.h>
8#include <net/netfilter/nf_conntrack.h> 8#include <net/netfilter/nf_conntrack.h>
9 9
10MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
10MODULE_LICENSE("GPL"); 11MODULE_LICENSE("GPL");
11MODULE_ALIAS("ipt_NOTRACK"); 12MODULE_ALIAS("ipt_NOTRACK");
12MODULE_ALIAS("ip6t_NOTRACK"); 13MODULE_ALIAS("ip6t_NOTRACK");
13 14
14static unsigned int 15static unsigned int
15target(struct sk_buff *skb, 16notrack_tg(struct sk_buff *skb, const struct net_device *in,
16 const struct net_device *in, 17 const struct net_device *out, unsigned int hooknum,
17 const struct net_device *out, 18 const struct xt_target *target, const void *targinfo)
18 unsigned int hooknum,
19 const struct xt_target *target,
20 const void *targinfo)
21{ 19{
22 /* Previously seen (loopback)? Ignore. */ 20 /* Previously seen (loopback)? Ignore. */
23 if (skb->nfct != NULL) 21 if (skb->nfct != NULL)
@@ -34,33 +32,32 @@ target(struct sk_buff *skb,
34 return XT_CONTINUE; 32 return XT_CONTINUE;
35} 33}
36 34
37static struct xt_target xt_notrack_target[] __read_mostly = { 35static struct xt_target notrack_tg_reg[] __read_mostly = {
38 { 36 {
39 .name = "NOTRACK", 37 .name = "NOTRACK",
40 .family = AF_INET, 38 .family = AF_INET,
41 .target = target, 39 .target = notrack_tg,
42 .table = "raw", 40 .table = "raw",
43 .me = THIS_MODULE, 41 .me = THIS_MODULE,
44 }, 42 },
45 { 43 {
46 .name = "NOTRACK", 44 .name = "NOTRACK",
47 .family = AF_INET6, 45 .family = AF_INET6,
48 .target = target, 46 .target = notrack_tg,
49 .table = "raw", 47 .table = "raw",
50 .me = THIS_MODULE, 48 .me = THIS_MODULE,
51 }, 49 },
52}; 50};
53 51
54static int __init xt_notrack_init(void) 52static int __init notrack_tg_init(void)
55{ 53{
56 return xt_register_targets(xt_notrack_target, 54 return xt_register_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
57 ARRAY_SIZE(xt_notrack_target));
58} 55}
59 56
60static void __exit xt_notrack_fini(void) 57static void __exit notrack_tg_exit(void)
61{ 58{
62 xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target)); 59 xt_unregister_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
63} 60}
64 61
65module_init(xt_notrack_init); 62module_init(notrack_tg_init);
66module_exit(xt_notrack_fini); 63module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
new file mode 100644
index 000000000000..24c73ba31eaa
--- /dev/null
+++ b/net/netfilter/xt_RATEEST.c
@@ -0,0 +1,205 @@
1/*
2 * (C) 2007 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/gen_stats.h>
11#include <linux/jhash.h>
12#include <linux/rtnetlink.h>
13#include <linux/random.h>
14#include <net/gen_stats.h>
15#include <net/netlink.h>
16
17#include <linux/netfilter/x_tables.h>
18#include <linux/netfilter/xt_RATEEST.h>
19#include <net/netfilter/xt_rateest.h>
20
21static DEFINE_MUTEX(xt_rateest_mutex);
22
23#define RATEEST_HSIZE 16
24static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
25static unsigned int jhash_rnd __read_mostly;
26
27static unsigned int xt_rateest_hash(const char *name)
28{
29 return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
30 (RATEEST_HSIZE - 1);
31}
32
33static void xt_rateest_hash_insert(struct xt_rateest *est)
34{
35 unsigned int h;
36
37 h = xt_rateest_hash(est->name);
38 hlist_add_head(&est->list, &rateest_hash[h]);
39}
40
41struct xt_rateest *xt_rateest_lookup(const char *name)
42{
43 struct xt_rateest *est;
44 struct hlist_node *n;
45 unsigned int h;
46
47 h = xt_rateest_hash(name);
48 mutex_lock(&xt_rateest_mutex);
49 hlist_for_each_entry(est, n, &rateest_hash[h], list) {
50 if (strcmp(est->name, name) == 0) {
51 est->refcnt++;
52 mutex_unlock(&xt_rateest_mutex);
53 return est;
54 }
55 }
56 mutex_unlock(&xt_rateest_mutex);
57 return NULL;
58}
59EXPORT_SYMBOL_GPL(xt_rateest_lookup);
60
61void xt_rateest_put(struct xt_rateest *est)
62{
63 mutex_lock(&xt_rateest_mutex);
64 if (--est->refcnt == 0) {
65 hlist_del(&est->list);
66 gen_kill_estimator(&est->bstats, &est->rstats);
67 kfree(est);
68 }
69 mutex_unlock(&xt_rateest_mutex);
70}
71EXPORT_SYMBOL_GPL(xt_rateest_put);
72
73static unsigned int
74xt_rateest_tg(struct sk_buff *skb,
75 const struct net_device *in,
76 const struct net_device *out,
77 unsigned int hooknum,
78 const struct xt_target *target,
79 const void *targinfo)
80{
81 const struct xt_rateest_target_info *info = targinfo;
82 struct gnet_stats_basic *stats = &info->est->bstats;
83
84 spin_lock_bh(&info->est->lock);
85 stats->bytes += skb->len;
86 stats->packets++;
87 spin_unlock_bh(&info->est->lock);
88
89 return XT_CONTINUE;
90}
91
92static bool
93xt_rateest_tg_checkentry(const char *tablename,
94 const void *entry,
95 const struct xt_target *target,
96 void *targinfo,
97 unsigned int hook_mask)
98{
99 struct xt_rateest_target_info *info = (void *)targinfo;
100 struct xt_rateest *est;
101 struct {
102 struct nlattr opt;
103 struct gnet_estimator est;
104 } cfg;
105
106 est = xt_rateest_lookup(info->name);
107 if (est) {
108 /*
109 * If estimator parameters are specified, they must match the
110 * existing estimator.
111 */
112 if ((!info->interval && !info->ewma_log) ||
113 (info->interval != est->params.interval ||
114 info->ewma_log != est->params.ewma_log)) {
115 xt_rateest_put(est);
116 return false;
117 }
118 info->est = est;
119 return true;
120 }
121
122 est = kzalloc(sizeof(*est), GFP_KERNEL);
123 if (!est)
124 goto err1;
125
126 strlcpy(est->name, info->name, sizeof(est->name));
127 spin_lock_init(&est->lock);
128 est->refcnt = 1;
129 est->params.interval = info->interval;
130 est->params.ewma_log = info->ewma_log;
131
132 cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est));
133 cfg.opt.nla_type = TCA_STATS_RATE_EST;
134 cfg.est.interval = info->interval;
135 cfg.est.ewma_log = info->ewma_log;
136
137 if (gen_new_estimator(&est->bstats, &est->rstats, &est->lock,
138 &cfg.opt) < 0)
139 goto err2;
140
141 info->est = est;
142 xt_rateest_hash_insert(est);
143
144 return true;
145
146err2:
147 kfree(est);
148err1:
149 return false;
150}
151
152static void xt_rateest_tg_destroy(const struct xt_target *target,
153 void *targinfo)
154{
155 struct xt_rateest_target_info *info = targinfo;
156
157 xt_rateest_put(info->est);
158}
159
160static struct xt_target xt_rateest_target[] __read_mostly = {
161 {
162 .family = AF_INET,
163 .name = "RATEEST",
164 .target = xt_rateest_tg,
165 .checkentry = xt_rateest_tg_checkentry,
166 .destroy = xt_rateest_tg_destroy,
167 .targetsize = sizeof(struct xt_rateest_target_info),
168 .me = THIS_MODULE,
169 },
170 {
171 .family = AF_INET6,
172 .name = "RATEEST",
173 .target = xt_rateest_tg,
174 .checkentry = xt_rateest_tg_checkentry,
175 .destroy = xt_rateest_tg_destroy,
176 .targetsize = sizeof(struct xt_rateest_target_info),
177 .me = THIS_MODULE,
178 },
179};
180
181static int __init xt_rateest_tg_init(void)
182{
183 unsigned int i;
184
185 for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
186 INIT_HLIST_HEAD(&rateest_hash[i]);
187
188 get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
189 return xt_register_targets(xt_rateest_target,
190 ARRAY_SIZE(xt_rateest_target));
191}
192
193static void __exit xt_rateest_tg_fini(void)
194{
195 xt_unregister_targets(xt_rateest_target, ARRAY_SIZE(xt_rateest_target));
196}
197
198
199MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
200MODULE_LICENSE("GPL");
201MODULE_DESCRIPTION("Xtables: packet rate estimator");
202MODULE_ALIAS("ipt_RATEEST");
203MODULE_ALIAS("ip6t_RATEEST");
204module_init(xt_rateest_tg_init);
205module_exit(xt_rateest_tg_fini);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 235806eb6ecd..7708e2084ce2 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -20,7 +20,7 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); 22MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
23MODULE_DESCRIPTION("ip[6]tables SECMARK modification module"); 23MODULE_DESCRIPTION("Xtables: packet security mark modification");
24MODULE_ALIAS("ipt_SECMARK"); 24MODULE_ALIAS("ipt_SECMARK");
25MODULE_ALIAS("ip6t_SECMARK"); 25MODULE_ALIAS("ip6t_SECMARK");
26 26
@@ -28,10 +28,10 @@ MODULE_ALIAS("ip6t_SECMARK");
28 28
29static u8 mode; 29static u8 mode;
30 30
31static unsigned int target(struct sk_buff *skb, const struct net_device *in, 31static unsigned int
32 const struct net_device *out, unsigned int hooknum, 32secmark_tg(struct sk_buff *skb, const struct net_device *in,
33 const struct xt_target *target, 33 const struct net_device *out, unsigned int hooknum,
34 const void *targinfo) 34 const struct xt_target *target, const void *targinfo)
35{ 35{
36 u32 secmark = 0; 36 u32 secmark = 0;
37 const struct xt_secmark_target_info *info = targinfo; 37 const struct xt_secmark_target_info *info = targinfo;
@@ -72,18 +72,20 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
72 return false; 72 return false;
73 } 73 }
74 74
75 err = selinux_relabel_packet_permission(sel->selsid); 75 err = selinux_secmark_relabel_packet_permission(sel->selsid);
76 if (err) { 76 if (err) {
77 printk(KERN_INFO PFX "unable to obtain relabeling permission\n"); 77 printk(KERN_INFO PFX "unable to obtain relabeling permission\n");
78 return false; 78 return false;
79 } 79 }
80 80
81 selinux_secmark_refcount_inc();
81 return true; 82 return true;
82} 83}
83 84
84static bool checkentry(const char *tablename, const void *entry, 85static bool
85 const struct xt_target *target, void *targinfo, 86secmark_tg_check(const char *tablename, const void *entry,
86 unsigned int hook_mask) 87 const struct xt_target *target, void *targinfo,
88 unsigned int hook_mask)
87{ 89{
88 struct xt_secmark_target_info *info = targinfo; 90 struct xt_secmark_target_info *info = targinfo;
89 91
@@ -109,12 +111,21 @@ static bool checkentry(const char *tablename, const void *entry,
109 return true; 111 return true;
110} 112}
111 113
112static struct xt_target xt_secmark_target[] __read_mostly = { 114void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
115{
116 switch (mode) {
117 case SECMARK_MODE_SEL:
118 selinux_secmark_refcount_dec();
119 }
120}
121
122static struct xt_target secmark_tg_reg[] __read_mostly = {
113 { 123 {
114 .name = "SECMARK", 124 .name = "SECMARK",
115 .family = AF_INET, 125 .family = AF_INET,
116 .checkentry = checkentry, 126 .checkentry = secmark_tg_check,
117 .target = target, 127 .destroy = secmark_tg_destroy,
128 .target = secmark_tg,
118 .targetsize = sizeof(struct xt_secmark_target_info), 129 .targetsize = sizeof(struct xt_secmark_target_info),
119 .table = "mangle", 130 .table = "mangle",
120 .me = THIS_MODULE, 131 .me = THIS_MODULE,
@@ -122,24 +133,24 @@ static struct xt_target xt_secmark_target[] __read_mostly = {
122 { 133 {
123 .name = "SECMARK", 134 .name = "SECMARK",
124 .family = AF_INET6, 135 .family = AF_INET6,
125 .checkentry = checkentry, 136 .checkentry = secmark_tg_check,
126 .target = target, 137 .destroy = secmark_tg_destroy,
138 .target = secmark_tg,
127 .targetsize = sizeof(struct xt_secmark_target_info), 139 .targetsize = sizeof(struct xt_secmark_target_info),
128 .table = "mangle", 140 .table = "mangle",
129 .me = THIS_MODULE, 141 .me = THIS_MODULE,
130 }, 142 },
131}; 143};
132 144
133static int __init xt_secmark_init(void) 145static int __init secmark_tg_init(void)
134{ 146{
135 return xt_register_targets(xt_secmark_target, 147 return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
136 ARRAY_SIZE(xt_secmark_target));
137} 148}
138 149
139static void __exit xt_secmark_fini(void) 150static void __exit secmark_tg_exit(void)
140{ 151{
141 xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target)); 152 xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
142} 153}
143 154
144module_init(xt_secmark_init); 155module_init(secmark_tg_init);
145module_exit(xt_secmark_fini); 156module_exit(secmark_tg_exit);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 07435a602b11..217e2b686322 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -13,7 +13,10 @@
13#include <linux/ip.h> 13#include <linux/ip.h>
14#include <linux/ipv6.h> 14#include <linux/ipv6.h>
15#include <linux/tcp.h> 15#include <linux/tcp.h>
16#include <net/dst.h>
17#include <net/flow.h>
16#include <net/ipv6.h> 18#include <net/ipv6.h>
19#include <net/route.h>
17#include <net/tcp.h> 20#include <net/tcp.h>
18 21
19#include <linux/netfilter_ipv4/ip_tables.h> 22#include <linux/netfilter_ipv4/ip_tables.h>
@@ -24,7 +27,7 @@
24 27
25MODULE_LICENSE("GPL"); 28MODULE_LICENSE("GPL");
26MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 29MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
27MODULE_DESCRIPTION("x_tables TCP MSS modification module"); 30MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
28MODULE_ALIAS("ipt_TCPMSS"); 31MODULE_ALIAS("ipt_TCPMSS");
29MODULE_ALIAS("ip6t_TCPMSS"); 32MODULE_ALIAS("ip6t_TCPMSS");
30 33
@@ -41,6 +44,7 @@ optlen(const u_int8_t *opt, unsigned int offset)
41static int 44static int
42tcpmss_mangle_packet(struct sk_buff *skb, 45tcpmss_mangle_packet(struct sk_buff *skb,
43 const struct xt_tcpmss_info *info, 46 const struct xt_tcpmss_info *info,
47 unsigned int in_mtu,
44 unsigned int tcphoff, 48 unsigned int tcphoff,
45 unsigned int minlen) 49 unsigned int minlen)
46{ 50{
@@ -76,7 +80,13 @@ tcpmss_mangle_packet(struct sk_buff *skb,
76 dst_mtu(skb->dst)); 80 dst_mtu(skb->dst));
77 return -1; 81 return -1;
78 } 82 }
79 newmss = dst_mtu(skb->dst) - minlen; 83 if (in_mtu <= minlen) {
84 if (net_ratelimit())
85 printk(KERN_ERR "xt_TCPMSS: unknown or "
86 "invalid path-MTU (%u)\n", in_mtu);
87 return -1;
88 }
89 newmss = min(dst_mtu(skb->dst), in_mtu) - minlen;
80 } else 90 } else
81 newmss = info->mss; 91 newmss = info->mss;
82 92
@@ -88,15 +98,19 @@ tcpmss_mangle_packet(struct sk_buff *skb,
88 98
89 oldmss = (opt[i+2] << 8) | opt[i+3]; 99 oldmss = (opt[i+2] << 8) | opt[i+3];
90 100
91 if (info->mss == XT_TCPMSS_CLAMP_PMTU && 101 /* Never increase MSS, even when setting it, as
92 oldmss <= newmss) 102 * doing so results in problems for hosts that rely
103 * on MSS being set correctly.
104 */
105 if (oldmss <= newmss)
93 return 0; 106 return 0;
94 107
95 opt[i+2] = (newmss & 0xff00) >> 8; 108 opt[i+2] = (newmss & 0xff00) >> 8;
96 opt[i+3] = newmss & 0x00ff; 109 opt[i+3] = newmss & 0x00ff;
97 110
98 nf_proto_csum_replace2(&tcph->check, skb, 111 inet_proto_csum_replace2(&tcph->check, skb,
99 htons(oldmss), htons(newmss), 0); 112 htons(oldmss), htons(newmss),
113 0);
100 return 0; 114 return 0;
101 } 115 }
102 } 116 }
@@ -117,55 +131,94 @@ tcpmss_mangle_packet(struct sk_buff *skb,
117 opt = (u_int8_t *)tcph + sizeof(struct tcphdr); 131 opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
118 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); 132 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
119 133
120 nf_proto_csum_replace2(&tcph->check, skb, 134 inet_proto_csum_replace2(&tcph->check, skb,
121 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); 135 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
122 opt[0] = TCPOPT_MSS; 136 opt[0] = TCPOPT_MSS;
123 opt[1] = TCPOLEN_MSS; 137 opt[1] = TCPOLEN_MSS;
124 opt[2] = (newmss & 0xff00) >> 8; 138 opt[2] = (newmss & 0xff00) >> 8;
125 opt[3] = newmss & 0x00ff; 139 opt[3] = newmss & 0x00ff;
126 140
127 nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); 141 inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
128 142
129 oldval = ((__be16 *)tcph)[6]; 143 oldval = ((__be16 *)tcph)[6];
130 tcph->doff += TCPOLEN_MSS/4; 144 tcph->doff += TCPOLEN_MSS/4;
131 nf_proto_csum_replace2(&tcph->check, skb, 145 inet_proto_csum_replace2(&tcph->check, skb,
132 oldval, ((__be16 *)tcph)[6], 0); 146 oldval, ((__be16 *)tcph)[6], 0);
133 return TCPOLEN_MSS; 147 return TCPOLEN_MSS;
134} 148}
135 149
150static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph)
151{
152 struct flowi fl = {
153 .fl4_dst = iph->saddr,
154 };
155 const struct nf_afinfo *ai;
156 struct rtable *rt = NULL;
157 u_int32_t mtu = ~0U;
158
159 rcu_read_lock();
160 ai = nf_get_afinfo(AF_INET);
161 if (ai != NULL)
162 ai->route((struct dst_entry **)&rt, &fl);
163 rcu_read_unlock();
164
165 if (rt != NULL) {
166 mtu = dst_mtu(&rt->u.dst);
167 dst_release(&rt->u.dst);
168 }
169 return mtu;
170}
171
136static unsigned int 172static unsigned int
137xt_tcpmss_target4(struct sk_buff *skb, 173tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
138 const struct net_device *in, 174 const struct net_device *out, unsigned int hooknum,
139 const struct net_device *out, 175 const struct xt_target *target, const void *targinfo)
140 unsigned int hooknum,
141 const struct xt_target *target,
142 const void *targinfo)
143{ 176{
144 struct iphdr *iph = ip_hdr(skb); 177 struct iphdr *iph = ip_hdr(skb);
145 __be16 newlen; 178 __be16 newlen;
146 int ret; 179 int ret;
147 180
148 ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4, 181 ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu4(iph),
182 iph->ihl * 4,
149 sizeof(*iph) + sizeof(struct tcphdr)); 183 sizeof(*iph) + sizeof(struct tcphdr));
150 if (ret < 0) 184 if (ret < 0)
151 return NF_DROP; 185 return NF_DROP;
152 if (ret > 0) { 186 if (ret > 0) {
153 iph = ip_hdr(skb); 187 iph = ip_hdr(skb);
154 newlen = htons(ntohs(iph->tot_len) + ret); 188 newlen = htons(ntohs(iph->tot_len) + ret);
155 nf_csum_replace2(&iph->check, iph->tot_len, newlen); 189 csum_replace2(&iph->check, iph->tot_len, newlen);
156 iph->tot_len = newlen; 190 iph->tot_len = newlen;
157 } 191 }
158 return XT_CONTINUE; 192 return XT_CONTINUE;
159} 193}
160 194
161#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 195#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
196static u_int32_t tcpmss_reverse_mtu6(const struct ipv6hdr *iph)
197{
198 struct flowi fl = {
199 .fl6_dst = iph->saddr,
200 };
201 const struct nf_afinfo *ai;
202 struct rtable *rt = NULL;
203 u_int32_t mtu = ~0U;
204
205 rcu_read_lock();
206 ai = nf_get_afinfo(AF_INET6);
207 if (ai != NULL)
208 ai->route((struct dst_entry **)&rt, &fl);
209 rcu_read_unlock();
210
211 if (rt != NULL) {
212 mtu = dst_mtu(&rt->u.dst);
213 dst_release(&rt->u.dst);
214 }
215 return mtu;
216}
217
162static unsigned int 218static unsigned int
163xt_tcpmss_target6(struct sk_buff *skb, 219tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
164 const struct net_device *in, 220 const struct net_device *out, unsigned int hooknum,
165 const struct net_device *out, 221 const struct xt_target *target, const void *targinfo)
166 unsigned int hooknum,
167 const struct xt_target *target,
168 const void *targinfo)
169{ 222{
170 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 223 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
171 u8 nexthdr; 224 u8 nexthdr;
@@ -174,11 +227,10 @@ xt_tcpmss_target6(struct sk_buff *skb,
174 227
175 nexthdr = ipv6h->nexthdr; 228 nexthdr = ipv6h->nexthdr;
176 tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); 229 tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
177 if (tcphoff < 0) { 230 if (tcphoff < 0)
178 WARN_ON(1);
179 return NF_DROP; 231 return NF_DROP;
180 } 232 ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu6(ipv6h),
181 ret = tcpmss_mangle_packet(skb, targinfo, tcphoff, 233 tcphoff,
182 sizeof(*ipv6h) + sizeof(struct tcphdr)); 234 sizeof(*ipv6h) + sizeof(struct tcphdr));
183 if (ret < 0) 235 if (ret < 0)
184 return NF_DROP; 236 return NF_DROP;
@@ -206,19 +258,17 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
206} 258}
207 259
208static bool 260static bool
209xt_tcpmss_checkentry4(const char *tablename, 261tcpmss_tg4_check(const char *tablename, const void *entry,
210 const void *entry, 262 const struct xt_target *target, void *targinfo,
211 const struct xt_target *target, 263 unsigned int hook_mask)
212 void *targinfo,
213 unsigned int hook_mask)
214{ 264{
215 const struct xt_tcpmss_info *info = targinfo; 265 const struct xt_tcpmss_info *info = targinfo;
216 const struct ipt_entry *e = entry; 266 const struct ipt_entry *e = entry;
217 267
218 if (info->mss == XT_TCPMSS_CLAMP_PMTU && 268 if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
219 (hook_mask & ~((1 << NF_IP_FORWARD) | 269 (hook_mask & ~((1 << NF_INET_FORWARD) |
220 (1 << NF_IP_LOCAL_OUT) | 270 (1 << NF_INET_LOCAL_OUT) |
221 (1 << NF_IP_POST_ROUTING))) != 0) { 271 (1 << NF_INET_POST_ROUTING))) != 0) {
222 printk("xt_TCPMSS: path-MTU clamping only supported in " 272 printk("xt_TCPMSS: path-MTU clamping only supported in "
223 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 273 "FORWARD, OUTPUT and POSTROUTING hooks\n");
224 return false; 274 return false;
@@ -231,19 +281,17 @@ xt_tcpmss_checkentry4(const char *tablename,
231 281
232#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 282#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
233static bool 283static bool
234xt_tcpmss_checkentry6(const char *tablename, 284tcpmss_tg6_check(const char *tablename, const void *entry,
235 const void *entry, 285 const struct xt_target *target, void *targinfo,
236 const struct xt_target *target, 286 unsigned int hook_mask)
237 void *targinfo,
238 unsigned int hook_mask)
239{ 287{
240 const struct xt_tcpmss_info *info = targinfo; 288 const struct xt_tcpmss_info *info = targinfo;
241 const struct ip6t_entry *e = entry; 289 const struct ip6t_entry *e = entry;
242 290
243 if (info->mss == XT_TCPMSS_CLAMP_PMTU && 291 if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
244 (hook_mask & ~((1 << NF_IP6_FORWARD) | 292 (hook_mask & ~((1 << NF_INET_FORWARD) |
245 (1 << NF_IP6_LOCAL_OUT) | 293 (1 << NF_INET_LOCAL_OUT) |
246 (1 << NF_IP6_POST_ROUTING))) != 0) { 294 (1 << NF_INET_POST_ROUTING))) != 0) {
247 printk("xt_TCPMSS: path-MTU clamping only supported in " 295 printk("xt_TCPMSS: path-MTU clamping only supported in "
248 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 296 "FORWARD, OUTPUT and POSTROUTING hooks\n");
249 return false; 297 return false;
@@ -255,12 +303,12 @@ xt_tcpmss_checkentry6(const char *tablename,
255} 303}
256#endif 304#endif
257 305
258static struct xt_target xt_tcpmss_reg[] __read_mostly = { 306static struct xt_target tcpmss_tg_reg[] __read_mostly = {
259 { 307 {
260 .family = AF_INET, 308 .family = AF_INET,
261 .name = "TCPMSS", 309 .name = "TCPMSS",
262 .checkentry = xt_tcpmss_checkentry4, 310 .checkentry = tcpmss_tg4_check,
263 .target = xt_tcpmss_target4, 311 .target = tcpmss_tg4,
264 .targetsize = sizeof(struct xt_tcpmss_info), 312 .targetsize = sizeof(struct xt_tcpmss_info),
265 .proto = IPPROTO_TCP, 313 .proto = IPPROTO_TCP,
266 .me = THIS_MODULE, 314 .me = THIS_MODULE,
@@ -269,8 +317,8 @@ static struct xt_target xt_tcpmss_reg[] __read_mostly = {
269 { 317 {
270 .family = AF_INET6, 318 .family = AF_INET6,
271 .name = "TCPMSS", 319 .name = "TCPMSS",
272 .checkentry = xt_tcpmss_checkentry6, 320 .checkentry = tcpmss_tg6_check,
273 .target = xt_tcpmss_target6, 321 .target = tcpmss_tg6,
274 .targetsize = sizeof(struct xt_tcpmss_info), 322 .targetsize = sizeof(struct xt_tcpmss_info),
275 .proto = IPPROTO_TCP, 323 .proto = IPPROTO_TCP,
276 .me = THIS_MODULE, 324 .me = THIS_MODULE,
@@ -278,15 +326,15 @@ static struct xt_target xt_tcpmss_reg[] __read_mostly = {
278#endif 326#endif
279}; 327};
280 328
281static int __init xt_tcpmss_init(void) 329static int __init tcpmss_tg_init(void)
282{ 330{
283 return xt_register_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); 331 return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
284} 332}
285 333
286static void __exit xt_tcpmss_fini(void) 334static void __exit tcpmss_tg_exit(void)
287{ 335{
288 xt_unregister_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); 336 xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
289} 337}
290 338
291module_init(xt_tcpmss_init); 339module_init(tcpmss_tg_init);
292module_exit(xt_tcpmss_fini); 340module_exit(tcpmss_tg_exit);
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
new file mode 100644
index 000000000000..3b2aa56833b9
--- /dev/null
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -0,0 +1,147 @@
1/*
2 * A module for stripping a specific TCP option from TCP packets.
3 *
4 * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
5 * Copyright © CC Computer Consultants GmbH, 2007
6 * Contact: Jan Engelhardt <jengelh@computergmbh.de>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <linux/ipv6.h>
17#include <linux/tcp.h>
18#include <net/ipv6.h>
19#include <net/tcp.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_TCPOPTSTRIP.h>
22
23static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
24{
25 /* Beware zero-length options: make finite progress */
26 if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
27 return 1;
28 else
29 return opt[offset+1];
30}
31
32static unsigned int
33tcpoptstrip_mangle_packet(struct sk_buff *skb,
34 const struct xt_tcpoptstrip_target_info *info,
35 unsigned int tcphoff, unsigned int minlen)
36{
37 unsigned int optl, i, j;
38 struct tcphdr *tcph;
39 u_int16_t n, o;
40 u_int8_t *opt;
41
42 if (!skb_make_writable(skb, skb->len))
43 return NF_DROP;
44
45 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
46 opt = (u_int8_t *)tcph;
47
48 /*
49 * Walk through all TCP options - if we find some option to remove,
50 * set all octets to %TCPOPT_NOP and adjust checksum.
51 */
52 for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) {
53 optl = optlen(opt, i);
54
55 if (i + optl > tcp_hdrlen(skb))
56 break;
57
58 if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
59 continue;
60
61 for (j = 0; j < optl; ++j) {
62 o = opt[i+j];
63 n = TCPOPT_NOP;
64 if ((i + j) % 2 == 0) {
65 o <<= 8;
66 n <<= 8;
67 }
68 inet_proto_csum_replace2(&tcph->check, skb, htons(o),
69 htons(n), 0);
70 }
71 memset(opt + i, TCPOPT_NOP, optl);
72 }
73
74 return XT_CONTINUE;
75}
76
77static unsigned int
78tcpoptstrip_tg4(struct sk_buff *skb, const struct net_device *in,
79 const struct net_device *out, unsigned int hooknum,
80 const struct xt_target *target, const void *targinfo)
81{
82 return tcpoptstrip_mangle_packet(skb, targinfo, ip_hdrlen(skb),
83 sizeof(struct iphdr) + sizeof(struct tcphdr));
84}
85
86#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
87static unsigned int
88tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
89 const struct net_device *out, unsigned int hooknum,
90 const struct xt_target *target, const void *targinfo)
91{
92 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
93 unsigned int tcphoff;
94 u_int8_t nexthdr;
95
96 nexthdr = ipv6h->nexthdr;
97 tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
98 if (tcphoff < 0)
99 return NF_DROP;
100
101 return tcpoptstrip_mangle_packet(skb, targinfo, tcphoff,
102 sizeof(*ipv6h) + sizeof(struct tcphdr));
103}
104#endif
105
106static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
107 {
108 .name = "TCPOPTSTRIP",
109 .family = AF_INET,
110 .table = "mangle",
111 .proto = IPPROTO_TCP,
112 .target = tcpoptstrip_tg4,
113 .targetsize = sizeof(struct xt_tcpoptstrip_target_info),
114 .me = THIS_MODULE,
115 },
116#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
117 {
118 .name = "TCPOPTSTRIP",
119 .family = AF_INET6,
120 .table = "mangle",
121 .proto = IPPROTO_TCP,
122 .target = tcpoptstrip_tg6,
123 .targetsize = sizeof(struct xt_tcpoptstrip_target_info),
124 .me = THIS_MODULE,
125 },
126#endif
127};
128
129static int __init tcpoptstrip_tg_init(void)
130{
131 return xt_register_targets(tcpoptstrip_tg_reg,
132 ARRAY_SIZE(tcpoptstrip_tg_reg));
133}
134
135static void __exit tcpoptstrip_tg_exit(void)
136{
137 xt_unregister_targets(tcpoptstrip_tg_reg,
138 ARRAY_SIZE(tcpoptstrip_tg_reg));
139}
140
141module_init(tcpoptstrip_tg_init);
142module_exit(tcpoptstrip_tg_exit);
143MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>");
144MODULE_DESCRIPTION("Xtables: TCP option stripping");
145MODULE_LICENSE("GPL");
146MODULE_ALIAS("ipt_TCPOPTSTRIP");
147MODULE_ALIAS("ip6t_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 26c5d08ab2c2..30dab79a3438 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -5,49 +5,46 @@
5 5
6#include <linux/netfilter/x_tables.h> 6#include <linux/netfilter/x_tables.h>
7 7
8MODULE_DESCRIPTION("Xtables: packet flow tracing");
8MODULE_LICENSE("GPL"); 9MODULE_LICENSE("GPL");
9MODULE_ALIAS("ipt_TRACE"); 10MODULE_ALIAS("ipt_TRACE");
10MODULE_ALIAS("ip6t_TRACE"); 11MODULE_ALIAS("ip6t_TRACE");
11 12
12static unsigned int 13static unsigned int
13target(struct sk_buff *skb, 14trace_tg(struct sk_buff *skb, const struct net_device *in,
14 const struct net_device *in, 15 const struct net_device *out, unsigned int hooknum,
15 const struct net_device *out, 16 const struct xt_target *target, const void *targinfo)
16 unsigned int hooknum,
17 const struct xt_target *target,
18 const void *targinfo)
19{ 17{
20 skb->nf_trace = 1; 18 skb->nf_trace = 1;
21 return XT_CONTINUE; 19 return XT_CONTINUE;
22} 20}
23 21
24static struct xt_target xt_trace_target[] __read_mostly = { 22static struct xt_target trace_tg_reg[] __read_mostly = {
25 { 23 {
26 .name = "TRACE", 24 .name = "TRACE",
27 .family = AF_INET, 25 .family = AF_INET,
28 .target = target, 26 .target = trace_tg,
29 .table = "raw", 27 .table = "raw",
30 .me = THIS_MODULE, 28 .me = THIS_MODULE,
31 }, 29 },
32 { 30 {
33 .name = "TRACE", 31 .name = "TRACE",
34 .family = AF_INET6, 32 .family = AF_INET6,
35 .target = target, 33 .target = trace_tg,
36 .table = "raw", 34 .table = "raw",
37 .me = THIS_MODULE, 35 .me = THIS_MODULE,
38 }, 36 },
39}; 37};
40 38
41static int __init xt_trace_init(void) 39static int __init trace_tg_init(void)
42{ 40{
43 return xt_register_targets(xt_trace_target, 41 return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
44 ARRAY_SIZE(xt_trace_target));
45} 42}
46 43
47static void __exit xt_trace_fini(void) 44static void __exit trace_tg_exit(void)
48{ 45{
49 xt_unregister_targets(xt_trace_target, ARRAY_SIZE(xt_trace_target)); 46 xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
50} 47}
51 48
52module_init(xt_trace_init); 49module_init(trace_tg_init);
53module_exit(xt_trace_fini); 50module_exit(trace_tg_exit);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 64bcdb0fe1e6..89f47364e848 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -10,52 +10,47 @@
10#include <linux/netfilter/xt_comment.h> 10#include <linux/netfilter/xt_comment.h>
11 11
12MODULE_AUTHOR("Brad Fisher <brad@info-link.net>"); 12MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
13MODULE_DESCRIPTION("iptables comment match module"); 13MODULE_DESCRIPTION("Xtables: No-op match which can be tagged with a comment");
14MODULE_LICENSE("GPL"); 14MODULE_LICENSE("GPL");
15MODULE_ALIAS("ipt_comment"); 15MODULE_ALIAS("ipt_comment");
16MODULE_ALIAS("ip6t_comment"); 16MODULE_ALIAS("ip6t_comment");
17 17
18static bool 18static bool
19match(const struct sk_buff *skb, 19comment_mt(const struct sk_buff *skb, const struct net_device *in,
20 const struct net_device *in, 20 const struct net_device *out, const struct xt_match *match,
21 const struct net_device *out, 21 const void *matchinfo, int offset, unsigned int protooff,
22 const struct xt_match *match, 22 bool *hotdrop)
23 const void *matchinfo,
24 int offset,
25 unsigned int protooff,
26 bool *hotdrop)
27{ 23{
28 /* We always match */ 24 /* We always match */
29 return true; 25 return true;
30} 26}
31 27
32static struct xt_match xt_comment_match[] __read_mostly = { 28static struct xt_match comment_mt_reg[] __read_mostly = {
33 { 29 {
34 .name = "comment", 30 .name = "comment",
35 .family = AF_INET, 31 .family = AF_INET,
36 .match = match, 32 .match = comment_mt,
37 .matchsize = sizeof(struct xt_comment_info), 33 .matchsize = sizeof(struct xt_comment_info),
38 .me = THIS_MODULE 34 .me = THIS_MODULE
39 }, 35 },
40 { 36 {
41 .name = "comment", 37 .name = "comment",
42 .family = AF_INET6, 38 .family = AF_INET6,
43 .match = match, 39 .match = comment_mt,
44 .matchsize = sizeof(struct xt_comment_info), 40 .matchsize = sizeof(struct xt_comment_info),
45 .me = THIS_MODULE 41 .me = THIS_MODULE
46 }, 42 },
47}; 43};
48 44
49static int __init xt_comment_init(void) 45static int __init comment_mt_init(void)
50{ 46{
51 return xt_register_matches(xt_comment_match, 47 return xt_register_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
52 ARRAY_SIZE(xt_comment_match));
53} 48}
54 49
55static void __exit xt_comment_fini(void) 50static void __exit comment_mt_exit(void)
56{ 51{
57 xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match)); 52 xt_unregister_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
58} 53}
59 54
60module_init(xt_comment_init); 55module_init(comment_mt_init);
61module_exit(xt_comment_fini); 56module_exit(comment_mt_exit);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 9ec50139b9a1..b15e7e2fa143 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -12,19 +12,15 @@
12 12
13MODULE_LICENSE("GPL"); 13MODULE_LICENSE("GPL");
14MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 14MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
15MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); 15MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching");
16MODULE_ALIAS("ipt_connbytes"); 16MODULE_ALIAS("ipt_connbytes");
17MODULE_ALIAS("ip6t_connbytes"); 17MODULE_ALIAS("ip6t_connbytes");
18 18
19static bool 19static bool
20match(const struct sk_buff *skb, 20connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
21 const struct net_device *in, 21 const struct net_device *out, const struct xt_match *match,
22 const struct net_device *out, 22 const void *matchinfo, int offset, unsigned int protoff,
23 const struct xt_match *match, 23 bool *hotdrop)
24 const void *matchinfo,
25 int offset,
26 unsigned int protoff,
27 bool *hotdrop)
28{ 24{
29 const struct xt_connbytes_info *sinfo = matchinfo; 25 const struct xt_connbytes_info *sinfo = matchinfo;
30 const struct nf_conn *ct; 26 const struct nf_conn *ct;
@@ -96,11 +92,10 @@ match(const struct sk_buff *skb,
96 return what >= sinfo->count.from; 92 return what >= sinfo->count.from;
97} 93}
98 94
99static bool check(const char *tablename, 95static bool
100 const void *ip, 96connbytes_mt_check(const char *tablename, const void *ip,
101 const struct xt_match *match, 97 const struct xt_match *match, void *matchinfo,
102 void *matchinfo, 98 unsigned int hook_mask)
103 unsigned int hook_mask)
104{ 99{
105 const struct xt_connbytes_info *sinfo = matchinfo; 100 const struct xt_connbytes_info *sinfo = matchinfo;
106 101
@@ -116,7 +111,7 @@ static bool check(const char *tablename,
116 111
117 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 112 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
118 printk(KERN_WARNING "can't load conntrack support for " 113 printk(KERN_WARNING "can't load conntrack support for "
119 "proto=%d\n", match->family); 114 "proto=%u\n", match->family);
120 return false; 115 return false;
121 } 116 }
122 117
@@ -124,43 +119,42 @@ static bool check(const char *tablename,
124} 119}
125 120
126static void 121static void
127destroy(const struct xt_match *match, void *matchinfo) 122connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
128{ 123{
129 nf_ct_l3proto_module_put(match->family); 124 nf_ct_l3proto_module_put(match->family);
130} 125}
131 126
132static struct xt_match xt_connbytes_match[] __read_mostly = { 127static struct xt_match connbytes_mt_reg[] __read_mostly = {
133 { 128 {
134 .name = "connbytes", 129 .name = "connbytes",
135 .family = AF_INET, 130 .family = AF_INET,
136 .checkentry = check, 131 .checkentry = connbytes_mt_check,
137 .match = match, 132 .match = connbytes_mt,
138 .destroy = destroy, 133 .destroy = connbytes_mt_destroy,
139 .matchsize = sizeof(struct xt_connbytes_info), 134 .matchsize = sizeof(struct xt_connbytes_info),
140 .me = THIS_MODULE 135 .me = THIS_MODULE
141 }, 136 },
142 { 137 {
143 .name = "connbytes", 138 .name = "connbytes",
144 .family = AF_INET6, 139 .family = AF_INET6,
145 .checkentry = check, 140 .checkentry = connbytes_mt_check,
146 .match = match, 141 .match = connbytes_mt,
147 .destroy = destroy, 142 .destroy = connbytes_mt_destroy,
148 .matchsize = sizeof(struct xt_connbytes_info), 143 .matchsize = sizeof(struct xt_connbytes_info),
149 .me = THIS_MODULE 144 .me = THIS_MODULE
150 }, 145 },
151}; 146};
152 147
153static int __init xt_connbytes_init(void) 148static int __init connbytes_mt_init(void)
154{ 149{
155 return xt_register_matches(xt_connbytes_match, 150 return xt_register_matches(connbytes_mt_reg,
156 ARRAY_SIZE(xt_connbytes_match)); 151 ARRAY_SIZE(connbytes_mt_reg));
157} 152}
158 153
159static void __exit xt_connbytes_fini(void) 154static void __exit connbytes_mt_exit(void)
160{ 155{
161 xt_unregister_matches(xt_connbytes_match, 156 xt_unregister_matches(connbytes_mt_reg, ARRAY_SIZE(connbytes_mt_reg));
162 ARRAY_SIZE(xt_connbytes_match));
163} 157}
164 158
165module_init(xt_connbytes_init); 159module_init(connbytes_mt_init);
166module_exit(xt_connbytes_fini); 160module_exit(connbytes_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 06cff1d13690..3b0111933f60 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -4,7 +4,8 @@
4 * (c) 2000 Gerd Knorr <kraxel@bytesex.org> 4 * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
5 * Nov 2002: Martin Bene <martin.bene@icomedias.com>: 5 * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
6 * only ignore TIME_WAIT or gone connections 6 * only ignore TIME_WAIT or gone connections
7 * Copyright © Jan Engelhardt <jengelh@gmx.de>, 2007 7 * (C) CC Computer Consultants GmbH, 2007
8 * Contact: <jengelh@computergmbh.de>
8 * 9 *
9 * based on ... 10 * based on ...
10 * 11 *
@@ -52,10 +53,10 @@ static inline unsigned int connlimit_iphash(__be32 addr)
52} 53}
53 54
54static inline unsigned int 55static inline unsigned int
55connlimit_iphash6(const union nf_conntrack_address *addr, 56connlimit_iphash6(const union nf_inet_addr *addr,
56 const union nf_conntrack_address *mask) 57 const union nf_inet_addr *mask)
57{ 58{
58 union nf_conntrack_address res; 59 union nf_inet_addr res;
59 unsigned int i; 60 unsigned int i;
60 61
61 if (unlikely(!connlimit_rnd_inited)) { 62 if (unlikely(!connlimit_rnd_inited)) {
@@ -80,14 +81,14 @@ static inline bool already_closed(const struct nf_conn *conn)
80} 81}
81 82
82static inline unsigned int 83static inline unsigned int
83same_source_net(const union nf_conntrack_address *addr, 84same_source_net(const union nf_inet_addr *addr,
84 const union nf_conntrack_address *mask, 85 const union nf_inet_addr *mask,
85 const union nf_conntrack_address *u3, unsigned int family) 86 const union nf_inet_addr *u3, unsigned int family)
86{ 87{
87 if (family == AF_INET) { 88 if (family == AF_INET) {
88 return (addr->ip & mask->ip) == (u3->ip & mask->ip); 89 return (addr->ip & mask->ip) == (u3->ip & mask->ip);
89 } else { 90 } else {
90 union nf_conntrack_address lh, rh; 91 union nf_inet_addr lh, rh;
91 unsigned int i; 92 unsigned int i;
92 93
93 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) { 94 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
@@ -101,8 +102,8 @@ same_source_net(const union nf_conntrack_address *addr,
101 102
102static int count_them(struct xt_connlimit_data *data, 103static int count_them(struct xt_connlimit_data *data,
103 const struct nf_conntrack_tuple *tuple, 104 const struct nf_conntrack_tuple *tuple,
104 const union nf_conntrack_address *addr, 105 const union nf_inet_addr *addr,
105 const union nf_conntrack_address *mask, 106 const union nf_inet_addr *mask,
106 const struct xt_match *match) 107 const struct xt_match *match)
107{ 108{
108 struct nf_conntrack_tuple_hash *found; 109 struct nf_conntrack_tuple_hash *found;
@@ -119,11 +120,11 @@ static int count_them(struct xt_connlimit_data *data,
119 else 120 else
120 hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; 121 hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)];
121 122
122 read_lock_bh(&nf_conntrack_lock); 123 rcu_read_lock();
123 124
124 /* check the saved connections */ 125 /* check the saved connections */
125 list_for_each_entry_safe(conn, tmp, hash, list) { 126 list_for_each_entry_safe(conn, tmp, hash, list) {
126 found = __nf_conntrack_find(&conn->tuple, NULL); 127 found = __nf_conntrack_find(&conn->tuple);
127 found_ct = NULL; 128 found_ct = NULL;
128 129
129 if (found != NULL) 130 if (found != NULL)
@@ -162,7 +163,7 @@ static int count_them(struct xt_connlimit_data *data,
162 ++matches; 163 ++matches;
163 } 164 }
164 165
165 read_unlock_bh(&nf_conntrack_lock); 166 rcu_read_unlock();
166 167
167 if (addit) { 168 if (addit) {
168 /* save the new connection in our list */ 169 /* save the new connection in our list */
@@ -177,15 +178,14 @@ static int count_them(struct xt_connlimit_data *data,
177 return matches; 178 return matches;
178} 179}
179 180
180static bool connlimit_match(const struct sk_buff *skb, 181static bool
181 const struct net_device *in, 182connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
182 const struct net_device *out, 183 const struct net_device *out, const struct xt_match *match,
183 const struct xt_match *match, 184 const void *matchinfo, int offset, unsigned int protoff,
184 const void *matchinfo, int offset, 185 bool *hotdrop)
185 unsigned int protoff, bool *hotdrop)
186{ 186{
187 const struct xt_connlimit_info *info = matchinfo; 187 const struct xt_connlimit_info *info = matchinfo;
188 union nf_conntrack_address addr, mask; 188 union nf_inet_addr addr;
189 struct nf_conntrack_tuple tuple; 189 struct nf_conntrack_tuple tuple;
190 const struct nf_conntrack_tuple *tuple_ptr = &tuple; 190 const struct nf_conntrack_tuple *tuple_ptr = &tuple;
191 enum ip_conntrack_info ctinfo; 191 enum ip_conntrack_info ctinfo;
@@ -202,15 +202,14 @@ static bool connlimit_match(const struct sk_buff *skb,
202 if (match->family == AF_INET6) { 202 if (match->family == AF_INET6) {
203 const struct ipv6hdr *iph = ipv6_hdr(skb); 203 const struct ipv6hdr *iph = ipv6_hdr(skb);
204 memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); 204 memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
205 memcpy(&mask.ip6, info->v6_mask, sizeof(info->v6_mask));
206 } else { 205 } else {
207 const struct iphdr *iph = ip_hdr(skb); 206 const struct iphdr *iph = ip_hdr(skb);
208 addr.ip = iph->saddr; 207 addr.ip = iph->saddr;
209 mask.ip = info->v4_mask;
210 } 208 }
211 209
212 spin_lock_bh(&info->data->lock); 210 spin_lock_bh(&info->data->lock);
213 connections = count_them(info->data, tuple_ptr, &addr, &mask, match); 211 connections = count_them(info->data, tuple_ptr, &addr,
212 &info->mask, match);
214 spin_unlock_bh(&info->data->lock); 213 spin_unlock_bh(&info->data->lock);
215 214
216 if (connections < 0) { 215 if (connections < 0) {
@@ -226,9 +225,10 @@ static bool connlimit_match(const struct sk_buff *skb,
226 return false; 225 return false;
227} 226}
228 227
229static bool connlimit_check(const char *tablename, const void *ip, 228static bool
230 const struct xt_match *match, void *matchinfo, 229connlimit_mt_check(const char *tablename, const void *ip,
231 unsigned int hook_mask) 230 const struct xt_match *match, void *matchinfo,
231 unsigned int hook_mask)
232{ 232{
233 struct xt_connlimit_info *info = matchinfo; 233 struct xt_connlimit_info *info = matchinfo;
234 unsigned int i; 234 unsigned int i;
@@ -253,7 +253,8 @@ static bool connlimit_check(const char *tablename, const void *ip,
253 return true; 253 return true;
254} 254}
255 255
256static void connlimit_destroy(const struct xt_match *match, void *matchinfo) 256static void
257connlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
257{ 258{
258 struct xt_connlimit_info *info = matchinfo; 259 struct xt_connlimit_info *info = matchinfo;
259 struct xt_connlimit_conn *conn; 260 struct xt_connlimit_conn *conn;
@@ -273,41 +274,42 @@ static void connlimit_destroy(const struct xt_match *match, void *matchinfo)
273 kfree(info->data); 274 kfree(info->data);
274} 275}
275 276
276static struct xt_match connlimit_reg[] __read_mostly = { 277static struct xt_match connlimit_mt_reg[] __read_mostly = {
277 { 278 {
278 .name = "connlimit", 279 .name = "connlimit",
279 .family = AF_INET, 280 .family = AF_INET,
280 .checkentry = connlimit_check, 281 .checkentry = connlimit_mt_check,
281 .match = connlimit_match, 282 .match = connlimit_mt,
282 .matchsize = sizeof(struct xt_connlimit_info), 283 .matchsize = sizeof(struct xt_connlimit_info),
283 .destroy = connlimit_destroy, 284 .destroy = connlimit_mt_destroy,
284 .me = THIS_MODULE, 285 .me = THIS_MODULE,
285 }, 286 },
286 { 287 {
287 .name = "connlimit", 288 .name = "connlimit",
288 .family = AF_INET6, 289 .family = AF_INET6,
289 .checkentry = connlimit_check, 290 .checkentry = connlimit_mt_check,
290 .match = connlimit_match, 291 .match = connlimit_mt,
291 .matchsize = sizeof(struct xt_connlimit_info), 292 .matchsize = sizeof(struct xt_connlimit_info),
292 .destroy = connlimit_destroy, 293 .destroy = connlimit_mt_destroy,
293 .me = THIS_MODULE, 294 .me = THIS_MODULE,
294 }, 295 },
295}; 296};
296 297
297static int __init xt_connlimit_init(void) 298static int __init connlimit_mt_init(void)
298{ 299{
299 return xt_register_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); 300 return xt_register_matches(connlimit_mt_reg,
301 ARRAY_SIZE(connlimit_mt_reg));
300} 302}
301 303
302static void __exit xt_connlimit_exit(void) 304static void __exit connlimit_mt_exit(void)
303{ 305{
304 xt_unregister_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); 306 xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
305} 307}
306 308
307module_init(xt_connlimit_init); 309module_init(connlimit_mt_init);
308module_exit(xt_connlimit_exit); 310module_exit(connlimit_mt_exit);
309MODULE_AUTHOR("Jan Engelhardt <jengelh@gmx.de>"); 311MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
310MODULE_DESCRIPTION("netfilter xt_connlimit match module"); 312MODULE_DESCRIPTION("Xtables: Number of connections matching");
311MODULE_LICENSE("GPL"); 313MODULE_LICENSE("GPL");
312MODULE_ALIAS("ipt_connlimit"); 314MODULE_ALIAS("ipt_connlimit");
313MODULE_ALIAS("ip6t_connlimit"); 315MODULE_ALIAS("ip6t_connlimit");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 9f67920af41f..aaa1b96691f9 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -1,8 +1,10 @@
1/* This kernel module matches connection mark values set by the 1/*
2 * CONNMARK target 2 * xt_connmark - Netfilter module to match connection mark values
3 * 3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> 4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com> 5 * by Henrik Nordstrom <hno@marasystems.com>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * Jan Engelhardt <jengelh@computergmbh.de>
6 * 8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -26,20 +28,33 @@
26#include <linux/netfilter/xt_connmark.h> 28#include <linux/netfilter/xt_connmark.h>
27 29
28MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); 30MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
29MODULE_DESCRIPTION("IP tables connmark match module"); 31MODULE_DESCRIPTION("Xtables: connection mark match");
30MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
31MODULE_ALIAS("ipt_connmark"); 33MODULE_ALIAS("ipt_connmark");
32MODULE_ALIAS("ip6t_connmark"); 34MODULE_ALIAS("ip6t_connmark");
33 35
34static bool 36static bool
35match(const struct sk_buff *skb, 37connmark_mt(const struct sk_buff *skb, const struct net_device *in,
36 const struct net_device *in, 38 const struct net_device *out, const struct xt_match *match,
37 const struct net_device *out, 39 const void *matchinfo, int offset, unsigned int protoff,
38 const struct xt_match *match, 40 bool *hotdrop)
39 const void *matchinfo, 41{
40 int offset, 42 const struct xt_connmark_mtinfo1 *info = matchinfo;
41 unsigned int protoff, 43 enum ip_conntrack_info ctinfo;
42 bool *hotdrop) 44 const struct nf_conn *ct;
45
46 ct = nf_ct_get(skb, &ctinfo);
47 if (ct == NULL)
48 return false;
49
50 return ((ct->mark & info->mask) == info->mark) ^ info->invert;
51}
52
53static bool
54connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
55 const struct net_device *out, const struct xt_match *match,
56 const void *matchinfo, int offset, unsigned int protoff,
57 bool *hotdrop)
43{ 58{
44 const struct xt_connmark_info *info = matchinfo; 59 const struct xt_connmark_info *info = matchinfo;
45 const struct nf_conn *ct; 60 const struct nf_conn *ct;
@@ -53,11 +68,9 @@ match(const struct sk_buff *skb,
53} 68}
54 69
55static bool 70static bool
56checkentry(const char *tablename, 71connmark_mt_check_v0(const char *tablename, const void *ip,
57 const void *ip, 72 const struct xt_match *match, void *matchinfo,
58 const struct xt_match *match, 73 unsigned int hook_mask)
59 void *matchinfo,
60 unsigned int hook_mask)
61{ 74{
62 const struct xt_connmark_info *cm = matchinfo; 75 const struct xt_connmark_info *cm = matchinfo;
63 76
@@ -67,14 +80,27 @@ checkentry(const char *tablename,
67 } 80 }
68 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 81 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
69 printk(KERN_WARNING "can't load conntrack support for " 82 printk(KERN_WARNING "can't load conntrack support for "
70 "proto=%d\n", match->family); 83 "proto=%u\n", match->family);
84 return false;
85 }
86 return true;
87}
88
89static bool
90connmark_mt_check(const char *tablename, const void *ip,
91 const struct xt_match *match, void *matchinfo,
92 unsigned int hook_mask)
93{
94 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
95 printk(KERN_WARNING "cannot load conntrack support for "
96 "proto=%u\n", match->family);
71 return false; 97 return false;
72 } 98 }
73 return true; 99 return true;
74} 100}
75 101
76static void 102static void
77destroy(const struct xt_match *match, void *matchinfo) 103connmark_mt_destroy(const struct xt_match *match, void *matchinfo)
78{ 104{
79 nf_ct_l3proto_module_put(match->family); 105 nf_ct_l3proto_module_put(match->family);
80} 106}
@@ -87,7 +113,7 @@ struct compat_xt_connmark_info {
87 u_int16_t __pad2; 113 u_int16_t __pad2;
88}; 114};
89 115
90static void compat_from_user(void *dst, void *src) 116static void connmark_mt_compat_from_user_v0(void *dst, void *src)
91{ 117{
92 const struct compat_xt_connmark_info *cm = src; 118 const struct compat_xt_connmark_info *cm = src;
93 struct xt_connmark_info m = { 119 struct xt_connmark_info m = {
@@ -98,7 +124,7 @@ static void compat_from_user(void *dst, void *src)
98 memcpy(dst, &m, sizeof(m)); 124 memcpy(dst, &m, sizeof(m));
99} 125}
100 126
101static int compat_to_user(void __user *dst, void *src) 127static int connmark_mt_compat_to_user_v0(void __user *dst, void *src)
102{ 128{
103 const struct xt_connmark_info *m = src; 129 const struct xt_connmark_info *m = src;
104 struct compat_xt_connmark_info cm = { 130 struct compat_xt_connmark_info cm = {
@@ -110,42 +136,69 @@ static int compat_to_user(void __user *dst, void *src)
110} 136}
111#endif /* CONFIG_COMPAT */ 137#endif /* CONFIG_COMPAT */
112 138
113static struct xt_match xt_connmark_match[] __read_mostly = { 139static struct xt_match connmark_mt_reg[] __read_mostly = {
114 { 140 {
115 .name = "connmark", 141 .name = "connmark",
142 .revision = 0,
116 .family = AF_INET, 143 .family = AF_INET,
117 .checkentry = checkentry, 144 .checkentry = connmark_mt_check_v0,
118 .match = match, 145 .match = connmark_mt_v0,
119 .destroy = destroy, 146 .destroy = connmark_mt_destroy,
120 .matchsize = sizeof(struct xt_connmark_info), 147 .matchsize = sizeof(struct xt_connmark_info),
121#ifdef CONFIG_COMPAT 148#ifdef CONFIG_COMPAT
122 .compatsize = sizeof(struct compat_xt_connmark_info), 149 .compatsize = sizeof(struct compat_xt_connmark_info),
123 .compat_from_user = compat_from_user, 150 .compat_from_user = connmark_mt_compat_from_user_v0,
124 .compat_to_user = compat_to_user, 151 .compat_to_user = connmark_mt_compat_to_user_v0,
125#endif 152#endif
126 .me = THIS_MODULE 153 .me = THIS_MODULE
127 }, 154 },
128 { 155 {
129 .name = "connmark", 156 .name = "connmark",
157 .revision = 0,
130 .family = AF_INET6, 158 .family = AF_INET6,
131 .checkentry = checkentry, 159 .checkentry = connmark_mt_check_v0,
132 .match = match, 160 .match = connmark_mt_v0,
133 .destroy = destroy, 161 .destroy = connmark_mt_destroy,
134 .matchsize = sizeof(struct xt_connmark_info), 162 .matchsize = sizeof(struct xt_connmark_info),
163#ifdef CONFIG_COMPAT
164 .compatsize = sizeof(struct compat_xt_connmark_info),
165 .compat_from_user = connmark_mt_compat_from_user_v0,
166 .compat_to_user = connmark_mt_compat_to_user_v0,
167#endif
135 .me = THIS_MODULE 168 .me = THIS_MODULE
136 }, 169 },
170 {
171 .name = "connmark",
172 .revision = 1,
173 .family = AF_INET,
174 .checkentry = connmark_mt_check,
175 .match = connmark_mt,
176 .matchsize = sizeof(struct xt_connmark_mtinfo1),
177 .destroy = connmark_mt_destroy,
178 .me = THIS_MODULE,
179 },
180 {
181 .name = "connmark",
182 .revision = 1,
183 .family = AF_INET6,
184 .checkentry = connmark_mt_check,
185 .match = connmark_mt,
186 .matchsize = sizeof(struct xt_connmark_mtinfo1),
187 .destroy = connmark_mt_destroy,
188 .me = THIS_MODULE,
189 },
137}; 190};
138 191
139static int __init xt_connmark_init(void) 192static int __init connmark_mt_init(void)
140{ 193{
141 return xt_register_matches(xt_connmark_match, 194 return xt_register_matches(connmark_mt_reg,
142 ARRAY_SIZE(xt_connmark_match)); 195 ARRAY_SIZE(connmark_mt_reg));
143} 196}
144 197
145static void __exit xt_connmark_fini(void) 198static void __exit connmark_mt_exit(void)
146{ 199{
147 xt_unregister_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); 200 xt_unregister_matches(connmark_mt_reg, ARRAY_SIZE(connmark_mt_reg));
148} 201}
149 202
150module_init(xt_connmark_init); 203module_init(connmark_mt_init);
151module_exit(xt_connmark_fini); 204module_exit(connmark_mt_exit);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index ca4b69f020a8..85330856a29c 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -1,33 +1,34 @@
1/* Kernel module to match connection tracking information. 1/*
2 * Superset of Rusty's minimalistic state match. 2 * xt_conntrack - Netfilter module to match connection tracking
3 * information. (Superset of Rusty's minimalistic state match.)
3 * 4 *
4 * (C) 2001 Marc Boucher (marc@mbsi.ca). 5 * (C) 2001 Marc Boucher (marc@mbsi.ca).
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
5 * 7 *
6 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
9 */ 11 */
10 12
11#include <linux/module.h> 13#include <linux/module.h>
12#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <net/ipv6.h>
13#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter/xt_conntrack.h> 17#include <linux/netfilter/xt_conntrack.h>
15#include <net/netfilter/nf_conntrack.h> 18#include <net/netfilter/nf_conntrack.h>
16 19
17MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
19MODULE_DESCRIPTION("iptables connection tracking match module"); 22MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
23MODULE_DESCRIPTION("Xtables: connection tracking state match");
20MODULE_ALIAS("ipt_conntrack"); 24MODULE_ALIAS("ipt_conntrack");
25MODULE_ALIAS("ip6t_conntrack");
21 26
22static bool 27static bool
23match(const struct sk_buff *skb, 28conntrack_mt_v0(const struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, 29 const struct net_device *out, const struct xt_match *match,
25 const struct net_device *out, 30 const void *matchinfo, int offset, unsigned int protoff,
26 const struct xt_match *match, 31 bool *hotdrop)
27 const void *matchinfo,
28 int offset,
29 unsigned int protoff,
30 bool *hotdrop)
31{ 32{
32 const struct xt_conntrack_info *sinfo = matchinfo; 33 const struct xt_conntrack_info *sinfo = matchinfo;
33 const struct nf_conn *ct; 34 const struct nf_conn *ct;
@@ -36,7 +37,7 @@ match(const struct sk_buff *skb,
36 37
37 ct = nf_ct_get(skb, &ctinfo); 38 ct = nf_ct_get(skb, &ctinfo);
38 39
39#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) 40#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & (invflg)))
40 41
41 if (ct == &nf_conntrack_untracked) 42 if (ct == &nf_conntrack_untracked)
42 statebit = XT_CONNTRACK_STATE_UNTRACKED; 43 statebit = XT_CONNTRACK_STATE_UNTRACKED;
@@ -112,24 +113,192 @@ match(const struct sk_buff *skb,
112 return false; 113 return false;
113 } 114 }
114 return true; 115 return true;
116#undef FWINV
115} 117}
116 118
117static bool 119static bool
118checkentry(const char *tablename, 120conntrack_addrcmp(const union nf_inet_addr *kaddr,
119 const void *ip, 121 const union nf_inet_addr *uaddr,
120 const struct xt_match *match, 122 const union nf_inet_addr *umask, unsigned int l3proto)
121 void *matchinfo, 123{
122 unsigned int hook_mask) 124 if (l3proto == AF_INET)
125 return (kaddr->ip & umask->ip) == uaddr->ip;
126 else if (l3proto == AF_INET6)
127 return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
128 &uaddr->in6) == 0;
129 else
130 return false;
131}
132
133static inline bool
134conntrack_mt_origsrc(const struct nf_conn *ct,
135 const struct xt_conntrack_mtinfo1 *info,
136 unsigned int family)
137{
138 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
139 &info->origsrc_addr, &info->origsrc_mask, family);
140}
141
142static inline bool
143conntrack_mt_origdst(const struct nf_conn *ct,
144 const struct xt_conntrack_mtinfo1 *info,
145 unsigned int family)
146{
147 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
148 &info->origdst_addr, &info->origdst_mask, family);
149}
150
151static inline bool
152conntrack_mt_replsrc(const struct nf_conn *ct,
153 const struct xt_conntrack_mtinfo1 *info,
154 unsigned int family)
155{
156 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
157 &info->replsrc_addr, &info->replsrc_mask, family);
158}
159
160static inline bool
161conntrack_mt_repldst(const struct nf_conn *ct,
162 const struct xt_conntrack_mtinfo1 *info,
163 unsigned int family)
164{
165 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
166 &info->repldst_addr, &info->repldst_mask, family);
167}
168
169static inline bool
170ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info,
171 const struct nf_conn *ct)
172{
173 const struct nf_conntrack_tuple *tuple;
174
175 tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
176 if ((info->match_flags & XT_CONNTRACK_PROTO) &&
177 (tuple->dst.protonum == info->l4proto) ^
178 !(info->invert_flags & XT_CONNTRACK_PROTO))
179 return false;
180
181 /* Shortcut to match all recognized protocols by using ->src.all. */
182 if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
183 (tuple->src.u.all == info->origsrc_port) ^
184 !(info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
185 return false;
186
187 if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
188 (tuple->dst.u.all == info->origdst_port) ^
189 !(info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
190 return false;
191
192 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
193
194 if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
195 (tuple->src.u.all == info->replsrc_port) ^
196 !(info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
197 return false;
198
199 if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
200 (tuple->dst.u.all == info->repldst_port) ^
201 !(info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
202 return false;
203
204 return true;
205}
206
207static bool
208conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
209 const struct net_device *out, const struct xt_match *match,
210 const void *matchinfo, int offset, unsigned int protoff,
211 bool *hotdrop)
212{
213 const struct xt_conntrack_mtinfo1 *info = matchinfo;
214 enum ip_conntrack_info ctinfo;
215 const struct nf_conn *ct;
216 unsigned int statebit;
217
218 ct = nf_ct_get(skb, &ctinfo);
219
220 if (ct == &nf_conntrack_untracked)
221 statebit = XT_CONNTRACK_STATE_UNTRACKED;
222 else if (ct != NULL)
223 statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
224 else
225 statebit = XT_CONNTRACK_STATE_INVALID;
226
227 if (info->match_flags & XT_CONNTRACK_STATE) {
228 if (ct != NULL) {
229 if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
230 statebit |= XT_CONNTRACK_STATE_SNAT;
231 if (test_bit(IPS_DST_NAT_BIT, &ct->status))
232 statebit |= XT_CONNTRACK_STATE_DNAT;
233 }
234 if ((info->state_mask & statebit) ^
235 !(info->invert_flags & XT_CONNTRACK_STATE))
236 return false;
237 }
238
239 if (ct == NULL)
240 return info->match_flags & XT_CONNTRACK_STATE;
241 if ((info->match_flags & XT_CONNTRACK_DIRECTION) &&
242 (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^
243 !!(info->invert_flags & XT_CONNTRACK_DIRECTION))
244 return false;
245
246 if (info->match_flags & XT_CONNTRACK_ORIGSRC)
247 if (conntrack_mt_origsrc(ct, info, match->family) ^
248 !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
249 return false;
250
251 if (info->match_flags & XT_CONNTRACK_ORIGDST)
252 if (conntrack_mt_origdst(ct, info, match->family) ^
253 !(info->invert_flags & XT_CONNTRACK_ORIGDST))
254 return false;
255
256 if (info->match_flags & XT_CONNTRACK_REPLSRC)
257 if (conntrack_mt_replsrc(ct, info, match->family) ^
258 !(info->invert_flags & XT_CONNTRACK_REPLSRC))
259 return false;
260
261 if (info->match_flags & XT_CONNTRACK_REPLDST)
262 if (conntrack_mt_repldst(ct, info, match->family) ^
263 !(info->invert_flags & XT_CONNTRACK_REPLDST))
264 return false;
265
266 if (!ct_proto_port_check(info, ct))
267 return false;
268
269 if ((info->match_flags & XT_CONNTRACK_STATUS) &&
270 (!!(info->status_mask & ct->status) ^
271 !(info->invert_flags & XT_CONNTRACK_STATUS)))
272 return false;
273
274 if (info->match_flags & XT_CONNTRACK_EXPIRES) {
275 unsigned long expires = 0;
276
277 if (timer_pending(&ct->timeout))
278 expires = (ct->timeout.expires - jiffies) / HZ;
279 if ((expires >= info->expires_min &&
280 expires <= info->expires_max) ^
281 !(info->invert_flags & XT_CONNTRACK_EXPIRES))
282 return false;
283 }
284 return true;
285}
286
287static bool
288conntrack_mt_check(const char *tablename, const void *ip,
289 const struct xt_match *match, void *matchinfo,
290 unsigned int hook_mask)
123{ 291{
124 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 292 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
125 printk(KERN_WARNING "can't load conntrack support for " 293 printk(KERN_WARNING "can't load conntrack support for "
126 "proto=%d\n", match->family); 294 "proto=%u\n", match->family);
127 return false; 295 return false;
128 } 296 }
129 return true; 297 return true;
130} 298}
131 299
132static void destroy(const struct xt_match *match, void *matchinfo) 300static void
301conntrack_mt_destroy(const struct xt_match *match, void *matchinfo)
133{ 302{
134 nf_ct_l3proto_module_put(match->family); 303 nf_ct_l3proto_module_put(match->family);
135} 304}
@@ -148,7 +317,7 @@ struct compat_xt_conntrack_info
148 u_int8_t invflags; 317 u_int8_t invflags;
149}; 318};
150 319
151static void compat_from_user(void *dst, void *src) 320static void conntrack_mt_compat_from_user_v0(void *dst, void *src)
152{ 321{
153 const struct compat_xt_conntrack_info *cm = src; 322 const struct compat_xt_conntrack_info *cm = src;
154 struct xt_conntrack_info m = { 323 struct xt_conntrack_info m = {
@@ -165,7 +334,7 @@ static void compat_from_user(void *dst, void *src)
165 memcpy(dst, &m, sizeof(m)); 334 memcpy(dst, &m, sizeof(m));
166} 335}
167 336
168static int compat_to_user(void __user *dst, void *src) 337static int conntrack_mt_compat_to_user_v0(void __user *dst, void *src)
169{ 338{
170 const struct xt_conntrack_info *m = src; 339 const struct xt_conntrack_info *m = src;
171 struct compat_xt_conntrack_info cm = { 340 struct compat_xt_conntrack_info cm = {
@@ -183,30 +352,54 @@ static int compat_to_user(void __user *dst, void *src)
183} 352}
184#endif 353#endif
185 354
186static struct xt_match conntrack_match __read_mostly = { 355static struct xt_match conntrack_mt_reg[] __read_mostly = {
187 .name = "conntrack", 356 {
188 .match = match, 357 .name = "conntrack",
189 .checkentry = checkentry, 358 .revision = 0,
190 .destroy = destroy, 359 .family = AF_INET,
191 .matchsize = sizeof(struct xt_conntrack_info), 360 .match = conntrack_mt_v0,
361 .checkentry = conntrack_mt_check,
362 .destroy = conntrack_mt_destroy,
363 .matchsize = sizeof(struct xt_conntrack_info),
364 .me = THIS_MODULE,
192#ifdef CONFIG_COMPAT 365#ifdef CONFIG_COMPAT
193 .compatsize = sizeof(struct compat_xt_conntrack_info), 366 .compatsize = sizeof(struct compat_xt_conntrack_info),
194 .compat_from_user = compat_from_user, 367 .compat_from_user = conntrack_mt_compat_from_user_v0,
195 .compat_to_user = compat_to_user, 368 .compat_to_user = conntrack_mt_compat_to_user_v0,
196#endif 369#endif
197 .family = AF_INET, 370 },
198 .me = THIS_MODULE, 371 {
372 .name = "conntrack",
373 .revision = 1,
374 .family = AF_INET,
375 .matchsize = sizeof(struct xt_conntrack_mtinfo1),
376 .match = conntrack_mt,
377 .checkentry = conntrack_mt_check,
378 .destroy = conntrack_mt_destroy,
379 .me = THIS_MODULE,
380 },
381 {
382 .name = "conntrack",
383 .revision = 1,
384 .family = AF_INET6,
385 .matchsize = sizeof(struct xt_conntrack_mtinfo1),
386 .match = conntrack_mt,
387 .checkentry = conntrack_mt_check,
388 .destroy = conntrack_mt_destroy,
389 .me = THIS_MODULE,
390 },
199}; 391};
200 392
201static int __init xt_conntrack_init(void) 393static int __init conntrack_mt_init(void)
202{ 394{
203 return xt_register_match(&conntrack_match); 395 return xt_register_matches(conntrack_mt_reg,
396 ARRAY_SIZE(conntrack_mt_reg));
204} 397}
205 398
206static void __exit xt_conntrack_fini(void) 399static void __exit conntrack_mt_exit(void)
207{ 400{
208 xt_unregister_match(&conntrack_match); 401 xt_unregister_matches(conntrack_mt_reg, ARRAY_SIZE(conntrack_mt_reg));
209} 402}
210 403
211module_init(xt_conntrack_init); 404module_init(conntrack_mt_init);
212module_exit(xt_conntrack_fini); 405module_exit(conntrack_mt_exit);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index c2b1b24ee335..667f45e72cd9 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -22,7 +22,7 @@
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 24MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
25MODULE_DESCRIPTION("Match for DCCP protocol packets"); 25MODULE_DESCRIPTION("Xtables: DCCP protocol packet match");
26MODULE_ALIAS("ipt_dccp"); 26MODULE_ALIAS("ipt_dccp");
27MODULE_ALIAS("ip6t_dccp"); 27MODULE_ALIAS("ip6t_dccp");
28 28
@@ -93,14 +93,9 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
93} 93}
94 94
95static bool 95static bool
96match(const struct sk_buff *skb, 96dccp_mt(const struct sk_buff *skb, const struct net_device *in,
97 const struct net_device *in, 97 const struct net_device *out, const struct xt_match *match,
98 const struct net_device *out, 98 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
99 const struct xt_match *match,
100 const void *matchinfo,
101 int offset,
102 unsigned int protoff,
103 bool *hotdrop)
104{ 99{
105 const struct xt_dccp_info *info = matchinfo; 100 const struct xt_dccp_info *info = matchinfo;
106 struct dccp_hdr _dh, *dh; 101 struct dccp_hdr _dh, *dh;
@@ -128,11 +123,9 @@ match(const struct sk_buff *skb,
128} 123}
129 124
130static bool 125static bool
131checkentry(const char *tablename, 126dccp_mt_check(const char *tablename, const void *inf,
132 const void *inf, 127 const struct xt_match *match, void *matchinfo,
133 const struct xt_match *match, 128 unsigned int hook_mask)
134 void *matchinfo,
135 unsigned int hook_mask)
136{ 129{
137 const struct xt_dccp_info *info = matchinfo; 130 const struct xt_dccp_info *info = matchinfo;
138 131
@@ -141,12 +134,12 @@ checkentry(const char *tablename,
141 && !(info->invflags & ~info->flags); 134 && !(info->invflags & ~info->flags);
142} 135}
143 136
144static struct xt_match xt_dccp_match[] __read_mostly = { 137static struct xt_match dccp_mt_reg[] __read_mostly = {
145 { 138 {
146 .name = "dccp", 139 .name = "dccp",
147 .family = AF_INET, 140 .family = AF_INET,
148 .checkentry = checkentry, 141 .checkentry = dccp_mt_check,
149 .match = match, 142 .match = dccp_mt,
150 .matchsize = sizeof(struct xt_dccp_info), 143 .matchsize = sizeof(struct xt_dccp_info),
151 .proto = IPPROTO_DCCP, 144 .proto = IPPROTO_DCCP,
152 .me = THIS_MODULE, 145 .me = THIS_MODULE,
@@ -154,15 +147,15 @@ static struct xt_match xt_dccp_match[] __read_mostly = {
154 { 147 {
155 .name = "dccp", 148 .name = "dccp",
156 .family = AF_INET6, 149 .family = AF_INET6,
157 .checkentry = checkentry, 150 .checkentry = dccp_mt_check,
158 .match = match, 151 .match = dccp_mt,
159 .matchsize = sizeof(struct xt_dccp_info), 152 .matchsize = sizeof(struct xt_dccp_info),
160 .proto = IPPROTO_DCCP, 153 .proto = IPPROTO_DCCP,
161 .me = THIS_MODULE, 154 .me = THIS_MODULE,
162 }, 155 },
163}; 156};
164 157
165static int __init xt_dccp_init(void) 158static int __init dccp_mt_init(void)
166{ 159{
167 int ret; 160 int ret;
168 161
@@ -172,7 +165,7 @@ static int __init xt_dccp_init(void)
172 dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); 165 dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
173 if (!dccp_optbuf) 166 if (!dccp_optbuf)
174 return -ENOMEM; 167 return -ENOMEM;
175 ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); 168 ret = xt_register_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
176 if (ret) 169 if (ret)
177 goto out_kfree; 170 goto out_kfree;
178 return ret; 171 return ret;
@@ -182,11 +175,11 @@ out_kfree:
182 return ret; 175 return ret;
183} 176}
184 177
185static void __exit xt_dccp_fini(void) 178static void __exit dccp_mt_exit(void)
186{ 179{
187 xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); 180 xt_unregister_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
188 kfree(dccp_optbuf); 181 kfree(dccp_optbuf);
189} 182}
190 183
191module_init(xt_dccp_init); 184module_init(dccp_mt_init);
192module_exit(xt_dccp_fini); 185module_exit(dccp_mt_exit);
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index dde6d66e0d33..26f4aab9c429 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -13,23 +13,22 @@
13#include <linux/ipv6.h> 13#include <linux/ipv6.h>
14#include <net/dsfield.h> 14#include <net/dsfield.h>
15 15
16#include <linux/netfilter/xt_dscp.h>
17#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_dscp.h>
18#include <linux/netfilter_ipv4/ipt_tos.h>
18 19
19MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 20MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
20MODULE_DESCRIPTION("x_tables DSCP matching module"); 21MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
21MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
22MODULE_ALIAS("ipt_dscp"); 23MODULE_ALIAS("ipt_dscp");
23MODULE_ALIAS("ip6t_dscp"); 24MODULE_ALIAS("ip6t_dscp");
25MODULE_ALIAS("ipt_tos");
26MODULE_ALIAS("ip6t_tos");
24 27
25static bool match(const struct sk_buff *skb, 28static bool
26 const struct net_device *in, 29dscp_mt(const struct sk_buff *skb, const struct net_device *in,
27 const struct net_device *out, 30 const struct net_device *out, const struct xt_match *match,
28 const struct xt_match *match, 31 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
29 const void *matchinfo,
30 int offset,
31 unsigned int protoff,
32 bool *hotdrop)
33{ 32{
34 const struct xt_dscp_info *info = matchinfo; 33 const struct xt_dscp_info *info = matchinfo;
35 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; 34 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -37,14 +36,11 @@ static bool match(const struct sk_buff *skb,
37 return (dscp == info->dscp) ^ !!info->invert; 36 return (dscp == info->dscp) ^ !!info->invert;
38} 37}
39 38
40static bool match6(const struct sk_buff *skb, 39static bool
41 const struct net_device *in, 40dscp_mt6(const struct sk_buff *skb, const struct net_device *in,
42 const struct net_device *out, 41 const struct net_device *out, const struct xt_match *match,
43 const struct xt_match *match, 42 const void *matchinfo, int offset, unsigned int protoff,
44 const void *matchinfo, 43 bool *hotdrop)
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 44{
49 const struct xt_dscp_info *info = matchinfo; 45 const struct xt_dscp_info *info = matchinfo;
50 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; 46 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -52,11 +48,10 @@ static bool match6(const struct sk_buff *skb,
52 return (dscp == info->dscp) ^ !!info->invert; 48 return (dscp == info->dscp) ^ !!info->invert;
53} 49}
54 50
55static bool checkentry(const char *tablename, 51static bool
56 const void *info, 52dscp_mt_check(const char *tablename, const void *info,
57 const struct xt_match *match, 53 const struct xt_match *match, void *matchinfo,
58 void *matchinfo, 54 unsigned int hook_mask)
59 unsigned int hook_mask)
60{ 55{
61 const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; 56 const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
62 57
@@ -68,34 +63,83 @@ static bool checkentry(const char *tablename,
68 return true; 63 return true;
69} 64}
70 65
71static struct xt_match xt_dscp_match[] __read_mostly = { 66static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in,
67 const struct net_device *out,
68 const struct xt_match *match, const void *matchinfo,
69 int offset, unsigned int protoff, bool *hotdrop)
70{
71 const struct ipt_tos_info *info = matchinfo;
72
73 return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
74}
75
76static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
77 const struct net_device *out, const struct xt_match *match,
78 const void *matchinfo, int offset, unsigned int protoff,
79 bool *hotdrop)
80{
81 const struct xt_tos_match_info *info = matchinfo;
82
83 if (match->family == AF_INET)
84 return ((ip_hdr(skb)->tos & info->tos_mask) ==
85 info->tos_value) ^ !!info->invert;
86 else
87 return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) ==
88 info->tos_value) ^ !!info->invert;
89}
90
91static struct xt_match dscp_mt_reg[] __read_mostly = {
72 { 92 {
73 .name = "dscp", 93 .name = "dscp",
74 .family = AF_INET, 94 .family = AF_INET,
75 .checkentry = checkentry, 95 .checkentry = dscp_mt_check,
76 .match = match, 96 .match = dscp_mt,
77 .matchsize = sizeof(struct xt_dscp_info), 97 .matchsize = sizeof(struct xt_dscp_info),
78 .me = THIS_MODULE, 98 .me = THIS_MODULE,
79 }, 99 },
80 { 100 {
81 .name = "dscp", 101 .name = "dscp",
82 .family = AF_INET6, 102 .family = AF_INET6,
83 .checkentry = checkentry, 103 .checkentry = dscp_mt_check,
84 .match = match6, 104 .match = dscp_mt6,
85 .matchsize = sizeof(struct xt_dscp_info), 105 .matchsize = sizeof(struct xt_dscp_info),
86 .me = THIS_MODULE, 106 .me = THIS_MODULE,
87 }, 107 },
108 {
109 .name = "tos",
110 .revision = 0,
111 .family = AF_INET,
112 .match = tos_mt_v0,
113 .matchsize = sizeof(struct ipt_tos_info),
114 .me = THIS_MODULE,
115 },
116 {
117 .name = "tos",
118 .revision = 1,
119 .family = AF_INET,
120 .match = tos_mt,
121 .matchsize = sizeof(struct xt_tos_match_info),
122 .me = THIS_MODULE,
123 },
124 {
125 .name = "tos",
126 .revision = 1,
127 .family = AF_INET6,
128 .match = tos_mt,
129 .matchsize = sizeof(struct xt_tos_match_info),
130 .me = THIS_MODULE,
131 },
88}; 132};
89 133
90static int __init xt_dscp_match_init(void) 134static int __init dscp_mt_init(void)
91{ 135{
92 return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); 136 return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
93} 137}
94 138
95static void __exit xt_dscp_match_fini(void) 139static void __exit dscp_mt_exit(void)
96{ 140{
97 xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); 141 xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
98} 142}
99 143
100module_init(xt_dscp_match_init); 144module_init(dscp_mt_init);
101module_exit(xt_dscp_match_fini); 145module_exit(dscp_mt_exit);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index b11378e001b6..71c7c3785266 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -20,7 +20,7 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); 22MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
23MODULE_DESCRIPTION("x_tables ESP SPI match module"); 23MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
24MODULE_ALIAS("ipt_esp"); 24MODULE_ALIAS("ipt_esp");
25MODULE_ALIAS("ip6t_esp"); 25MODULE_ALIAS("ip6t_esp");
26 26
@@ -43,14 +43,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
43} 43}
44 44
45static bool 45static bool
46match(const struct sk_buff *skb, 46esp_mt(const struct sk_buff *skb, const struct net_device *in,
47 const struct net_device *in, 47 const struct net_device *out, const struct xt_match *match,
48 const struct net_device *out, 48 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
49 const struct xt_match *match,
50 const void *matchinfo,
51 int offset,
52 unsigned int protoff,
53 bool *hotdrop)
54{ 49{
55 struct ip_esp_hdr _esp, *eh; 50 struct ip_esp_hdr _esp, *eh;
56 const struct xt_esp *espinfo = matchinfo; 51 const struct xt_esp *espinfo = matchinfo;
@@ -75,11 +70,9 @@ match(const struct sk_buff *skb,
75 70
76/* Called when user tries to insert an entry of this type. */ 71/* Called when user tries to insert an entry of this type. */
77static bool 72static bool
78checkentry(const char *tablename, 73esp_mt_check(const char *tablename, const void *ip_void,
79 const void *ip_void, 74 const struct xt_match *match, void *matchinfo,
80 const struct xt_match *match, 75 unsigned int hook_mask)
81 void *matchinfo,
82 unsigned int hook_mask)
83{ 76{
84 const struct xt_esp *espinfo = matchinfo; 77 const struct xt_esp *espinfo = matchinfo;
85 78
@@ -91,12 +84,12 @@ checkentry(const char *tablename,
91 return true; 84 return true;
92} 85}
93 86
94static struct xt_match xt_esp_match[] __read_mostly = { 87static struct xt_match esp_mt_reg[] __read_mostly = {
95 { 88 {
96 .name = "esp", 89 .name = "esp",
97 .family = AF_INET, 90 .family = AF_INET,
98 .checkentry = checkentry, 91 .checkentry = esp_mt_check,
99 .match = match, 92 .match = esp_mt,
100 .matchsize = sizeof(struct xt_esp), 93 .matchsize = sizeof(struct xt_esp),
101 .proto = IPPROTO_ESP, 94 .proto = IPPROTO_ESP,
102 .me = THIS_MODULE, 95 .me = THIS_MODULE,
@@ -104,23 +97,23 @@ static struct xt_match xt_esp_match[] __read_mostly = {
104 { 97 {
105 .name = "esp", 98 .name = "esp",
106 .family = AF_INET6, 99 .family = AF_INET6,
107 .checkentry = checkentry, 100 .checkentry = esp_mt_check,
108 .match = match, 101 .match = esp_mt,
109 .matchsize = sizeof(struct xt_esp), 102 .matchsize = sizeof(struct xt_esp),
110 .proto = IPPROTO_ESP, 103 .proto = IPPROTO_ESP,
111 .me = THIS_MODULE, 104 .me = THIS_MODULE,
112 }, 105 },
113}; 106};
114 107
115static int __init xt_esp_init(void) 108static int __init esp_mt_init(void)
116{ 109{
117 return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); 110 return xt_register_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
118} 111}
119 112
120static void __exit xt_esp_cleanup(void) 113static void __exit esp_mt_exit(void)
121{ 114{
122 xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); 115 xt_unregister_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
123} 116}
124 117
125module_init(xt_esp_init); 118module_init(esp_mt_init);
126module_exit(xt_esp_cleanup); 119module_exit(esp_mt_exit);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 19103678bf20..744c7f2ab0b1 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1,9 +1,9 @@
1/* iptables match extension to limit the number of packets per second 1/*
2 * seperately for each hashbucket (sourceip/sourceport/dstip/dstport) 2 * xt_hashlimit - Netfilter module to limit the number of packets per time
3 * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
3 * 4 *
4 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> 5 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
5 * 6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
6 * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $
7 * 7 *
8 * Development of this code was funded by Astaro AG, http://www.astaro.com/ 8 * Development of this code was funded by Astaro AG, http://www.astaro.com/
9 */ 9 */
@@ -20,7 +20,11 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/in.h> 21#include <linux/in.h>
22#include <linux/ip.h> 22#include <linux/ip.h>
23#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
23#include <linux/ipv6.h> 24#include <linux/ipv6.h>
25#include <net/ipv6.h>
26#endif
27
24#include <net/net_namespace.h> 28#include <net/net_namespace.h>
25 29
26#include <linux/netfilter/x_tables.h> 30#include <linux/netfilter/x_tables.h>
@@ -31,7 +35,8 @@
31 35
32MODULE_LICENSE("GPL"); 36MODULE_LICENSE("GPL");
33MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 37MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
34MODULE_DESCRIPTION("iptables match for limiting per hash-bucket"); 38MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
39MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
35MODULE_ALIAS("ipt_hashlimit"); 40MODULE_ALIAS("ipt_hashlimit");
36MODULE_ALIAS("ip6t_hashlimit"); 41MODULE_ALIAS("ip6t_hashlimit");
37 42
@@ -47,11 +52,13 @@ struct dsthash_dst {
47 __be32 src; 52 __be32 src;
48 __be32 dst; 53 __be32 dst;
49 } ip; 54 } ip;
55#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
50 struct { 56 struct {
51 __be32 src[4]; 57 __be32 src[4];
52 __be32 dst[4]; 58 __be32 dst[4];
53 } ip6; 59 } ip6;
54 } addr; 60#endif
61 };
55 __be16 src_port; 62 __be16 src_port;
56 __be16 dst_port; 63 __be16 dst_port;
57}; 64};
@@ -75,7 +82,7 @@ struct xt_hashlimit_htable {
75 atomic_t use; 82 atomic_t use;
76 int family; 83 int family;
77 84
78 struct hashlimit_cfg cfg; /* config */ 85 struct hashlimit_cfg1 cfg; /* config */
79 86
80 /* used internally */ 87 /* used internally */
81 spinlock_t lock; /* lock for list_head */ 88 spinlock_t lock; /* lock for list_head */
@@ -104,7 +111,16 @@ static inline bool dst_cmp(const struct dsthash_ent *ent,
104static u_int32_t 111static u_int32_t
105hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) 112hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
106{ 113{
107 return jhash(dst, sizeof(*dst), ht->rnd) % ht->cfg.size; 114 u_int32_t hash = jhash2((const u32 *)dst,
115 sizeof(*dst)/sizeof(u32),
116 ht->rnd);
117 /*
118 * Instead of returning hash % ht->cfg.size (implying a divide)
119 * we return the high 32 bits of the (hash * ht->cfg.size) that will
120 * give results between [0 and cfg.size-1] and same hash distribution,
121 * but using a multiply, less expensive than a divide
122 */
123 return ((u64)hash * ht->cfg.size) >> 32;
108} 124}
109 125
110static struct dsthash_ent * 126static struct dsthash_ent *
@@ -169,7 +185,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
169} 185}
170static void htable_gc(unsigned long htlong); 186static void htable_gc(unsigned long htlong);
171 187
172static int htable_create(struct xt_hashlimit_info *minfo, int family) 188static int htable_create_v0(struct xt_hashlimit_info *minfo, int family)
173{ 189{
174 struct xt_hashlimit_htable *hinfo; 190 struct xt_hashlimit_htable *hinfo;
175 unsigned int size; 191 unsigned int size;
@@ -195,7 +211,18 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
195 minfo->hinfo = hinfo; 211 minfo->hinfo = hinfo;
196 212
197 /* copy match config into hashtable config */ 213 /* copy match config into hashtable config */
198 memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); 214 hinfo->cfg.mode = minfo->cfg.mode;
215 hinfo->cfg.avg = minfo->cfg.avg;
216 hinfo->cfg.burst = minfo->cfg.burst;
217 hinfo->cfg.max = minfo->cfg.max;
218 hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
219 hinfo->cfg.expire = minfo->cfg.expire;
220
221 if (family == AF_INET)
222 hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
223 else
224 hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
225
199 hinfo->cfg.size = size; 226 hinfo->cfg.size = size;
200 if (!hinfo->cfg.max) 227 if (!hinfo->cfg.max)
201 hinfo->cfg.max = 8 * hinfo->cfg.size; 228 hinfo->cfg.max = 8 * hinfo->cfg.size;
@@ -231,6 +258,70 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
231 return 0; 258 return 0;
232} 259}
233 260
261static int htable_create(struct xt_hashlimit_mtinfo1 *minfo,
262 unsigned int family)
263{
264 struct xt_hashlimit_htable *hinfo;
265 unsigned int size;
266 unsigned int i;
267
268 if (minfo->cfg.size) {
269 size = minfo->cfg.size;
270 } else {
271 size = (num_physpages << PAGE_SHIFT) / 16384 /
272 sizeof(struct list_head);
273 if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE)
274 size = 8192;
275 if (size < 16)
276 size = 16;
277 }
278 /* FIXME: don't use vmalloc() here or anywhere else -HW */
279 hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
280 sizeof(struct list_head) * size);
281 if (hinfo == NULL) {
282 printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
283 return -1;
284 }
285 minfo->hinfo = hinfo;
286
287 /* copy match config into hashtable config */
288 memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
289 hinfo->cfg.size = size;
290 if (hinfo->cfg.max == 0)
291 hinfo->cfg.max = 8 * hinfo->cfg.size;
292 else if (hinfo->cfg.max < hinfo->cfg.size)
293 hinfo->cfg.max = hinfo->cfg.size;
294
295 for (i = 0; i < hinfo->cfg.size; i++)
296 INIT_HLIST_HEAD(&hinfo->hash[i]);
297
298 atomic_set(&hinfo->use, 1);
299 hinfo->count = 0;
300 hinfo->family = family;
301 hinfo->rnd_initialized = 0;
302 spin_lock_init(&hinfo->lock);
303
304 hinfo->pde = create_proc_entry(minfo->name, 0,
305 family == AF_INET ? hashlimit_procdir4 :
306 hashlimit_procdir6);
307 if (hinfo->pde == NULL) {
308 vfree(hinfo);
309 return -1;
310 }
311 hinfo->pde->proc_fops = &dl_file_ops;
312 hinfo->pde->data = hinfo;
313
314 setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
315 hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
316 add_timer(&hinfo->timer);
317
318 spin_lock_bh(&hashlimit_lock);
319 hlist_add_head(&hinfo->node, &hashlimit_htables);
320 spin_unlock_bh(&hashlimit_lock);
321
322 return 0;
323}
324
234static bool select_all(const struct xt_hashlimit_htable *ht, 325static bool select_all(const struct xt_hashlimit_htable *ht,
235 const struct dsthash_ent *he) 326 const struct dsthash_ent *he)
236{ 327{
@@ -240,7 +331,7 @@ static bool select_all(const struct xt_hashlimit_htable *ht,
240static bool select_gc(const struct xt_hashlimit_htable *ht, 331static bool select_gc(const struct xt_hashlimit_htable *ht,
241 const struct dsthash_ent *he) 332 const struct dsthash_ent *he)
242{ 333{
243 return jiffies >= he->expires; 334 return time_after_eq(jiffies, he->expires);
244} 335}
245 336
246static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, 337static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
@@ -373,22 +464,66 @@ static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
373 dh->rateinfo.prev = now; 464 dh->rateinfo.prev = now;
374} 465}
375 466
467static inline __be32 maskl(__be32 a, unsigned int l)
468{
469 return htonl(ntohl(a) & ~(~(u_int32_t)0 >> l));
470}
471
472#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
473static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
474{
475 switch (p) {
476 case 0:
477 i[0] = i[1] = 0;
478 i[2] = i[3] = 0;
479 break;
480 case 1 ... 31:
481 i[0] = maskl(i[0], p);
482 i[1] = i[2] = i[3] = 0;
483 break;
484 case 32:
485 i[1] = i[2] = i[3] = 0;
486 break;
487 case 33 ... 63:
488 i[1] = maskl(i[1], p - 32);
489 i[2] = i[3] = 0;
490 break;
491 case 64:
492 i[2] = i[3] = 0;
493 break;
494 case 65 ... 95:
495 i[2] = maskl(i[2], p - 64);
496 i[3] = 0;
497 case 96:
498 i[3] = 0;
499 break;
500 case 97 ... 127:
501 i[3] = maskl(i[3], p - 96);
502 break;
503 case 128:
504 break;
505 }
506}
507#endif
508
376static int 509static int
377hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, 510hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
378 struct dsthash_dst *dst, 511 struct dsthash_dst *dst,
379 const struct sk_buff *skb, unsigned int protoff) 512 const struct sk_buff *skb, unsigned int protoff)
380{ 513{
381 __be16 _ports[2], *ports; 514 __be16 _ports[2], *ports;
382 int nexthdr; 515 u8 nexthdr;
383 516
384 memset(dst, 0, sizeof(*dst)); 517 memset(dst, 0, sizeof(*dst));
385 518
386 switch (hinfo->family) { 519 switch (hinfo->family) {
387 case AF_INET: 520 case AF_INET:
388 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) 521 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
389 dst->addr.ip.dst = ip_hdr(skb)->daddr; 522 dst->ip.dst = maskl(ip_hdr(skb)->daddr,
523 hinfo->cfg.dstmask);
390 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) 524 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
391 dst->addr.ip.src = ip_hdr(skb)->saddr; 525 dst->ip.src = maskl(ip_hdr(skb)->saddr,
526 hinfo->cfg.srcmask);
392 527
393 if (!(hinfo->cfg.mode & 528 if (!(hinfo->cfg.mode &
394 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) 529 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
@@ -397,18 +532,23 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
397 break; 532 break;
398#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 533#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
399 case AF_INET6: 534 case AF_INET6:
400 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) 535 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
401 memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr, 536 memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
402 sizeof(dst->addr.ip6.dst)); 537 sizeof(dst->ip6.dst));
403 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) 538 hashlimit_ipv6_mask(dst->ip6.dst, hinfo->cfg.dstmask);
404 memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr, 539 }
405 sizeof(dst->addr.ip6.src)); 540 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) {
541 memcpy(&dst->ip6.src, &ipv6_hdr(skb)->saddr,
542 sizeof(dst->ip6.src));
543 hashlimit_ipv6_mask(dst->ip6.src, hinfo->cfg.srcmask);
544 }
406 545
407 if (!(hinfo->cfg.mode & 546 if (!(hinfo->cfg.mode &
408 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) 547 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
409 return 0; 548 return 0;
410 nexthdr = ipv6_find_hdr(skb, &protoff, -1, NULL); 549 nexthdr = ipv6_hdr(skb)->nexthdr;
411 if (nexthdr < 0) 550 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
551 if ((int)protoff < 0)
412 return -1; 552 return -1;
413 break; 553 break;
414#endif 554#endif
@@ -441,14 +581,10 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
441} 581}
442 582
443static bool 583static bool
444hashlimit_match(const struct sk_buff *skb, 584hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in,
445 const struct net_device *in, 585 const struct net_device *out, const struct xt_match *match,
446 const struct net_device *out, 586 const void *matchinfo, int offset, unsigned int protoff,
447 const struct xt_match *match, 587 bool *hotdrop)
448 const void *matchinfo,
449 int offset,
450 unsigned int protoff,
451 bool *hotdrop)
452{ 588{
453 const struct xt_hashlimit_info *r = 589 const struct xt_hashlimit_info *r =
454 ((const struct xt_hashlimit_info *)matchinfo)->u.master; 590 ((const struct xt_hashlimit_info *)matchinfo)->u.master;
@@ -500,11 +636,62 @@ hotdrop:
500} 636}
501 637
502static bool 638static bool
503hashlimit_checkentry(const char *tablename, 639hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
504 const void *inf, 640 const struct net_device *out, const struct xt_match *match,
505 const struct xt_match *match, 641 const void *matchinfo, int offset, unsigned int protoff,
506 void *matchinfo, 642 bool *hotdrop)
507 unsigned int hook_mask) 643{
644 const struct xt_hashlimit_mtinfo1 *info = matchinfo;
645 struct xt_hashlimit_htable *hinfo = info->hinfo;
646 unsigned long now = jiffies;
647 struct dsthash_ent *dh;
648 struct dsthash_dst dst;
649
650 if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0)
651 goto hotdrop;
652
653 spin_lock_bh(&hinfo->lock);
654 dh = dsthash_find(hinfo, &dst);
655 if (dh == NULL) {
656 dh = dsthash_alloc_init(hinfo, &dst);
657 if (dh == NULL) {
658 spin_unlock_bh(&hinfo->lock);
659 goto hotdrop;
660 }
661
662 dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
663 dh->rateinfo.prev = jiffies;
664 dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
665 hinfo->cfg.burst);
666 dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
667 hinfo->cfg.burst);
668 dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
669 } else {
670 /* update expiration timeout */
671 dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
672 rateinfo_recalc(dh, now);
673 }
674
675 if (dh->rateinfo.credit >= dh->rateinfo.cost) {
676 /* below the limit */
677 dh->rateinfo.credit -= dh->rateinfo.cost;
678 spin_unlock_bh(&hinfo->lock);
679 return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
680 }
681
682 spin_unlock_bh(&hinfo->lock);
683 /* default match is underlimit - so over the limit, we need to invert */
684 return info->cfg.mode & XT_HASHLIMIT_INVERT;
685
686 hotdrop:
687 *hotdrop = true;
688 return false;
689}
690
691static bool
692hashlimit_mt_check_v0(const char *tablename, const void *inf,
693 const struct xt_match *match, void *matchinfo,
694 unsigned int hook_mask)
508{ 695{
509 struct xt_hashlimit_info *r = matchinfo; 696 struct xt_hashlimit_info *r = matchinfo;
510 697
@@ -536,7 +723,7 @@ hashlimit_checkentry(const char *tablename,
536 * create duplicate proc files. -HW */ 723 * create duplicate proc files. -HW */
537 mutex_lock(&hlimit_mutex); 724 mutex_lock(&hlimit_mutex);
538 r->hinfo = htable_find_get(r->name, match->family); 725 r->hinfo = htable_find_get(r->name, match->family);
539 if (!r->hinfo && htable_create(r, match->family) != 0) { 726 if (!r->hinfo && htable_create_v0(r, match->family) != 0) {
540 mutex_unlock(&hlimit_mutex); 727 mutex_unlock(&hlimit_mutex);
541 return false; 728 return false;
542 } 729 }
@@ -547,14 +734,68 @@ hashlimit_checkentry(const char *tablename,
547 return true; 734 return true;
548} 735}
549 736
737static bool
738hashlimit_mt_check(const char *tablename, const void *inf,
739 const struct xt_match *match, void *matchinfo,
740 unsigned int hook_mask)
741{
742 struct xt_hashlimit_mtinfo1 *info = matchinfo;
743
744 /* Check for overflow. */
745 if (info->cfg.burst == 0 ||
746 user2credits(info->cfg.avg * info->cfg.burst) <
747 user2credits(info->cfg.avg)) {
748 printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
749 info->cfg.avg, info->cfg.burst);
750 return false;
751 }
752 if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
753 return false;
754 if (info->name[sizeof(info->name)-1] != '\0')
755 return false;
756 if (match->family == AF_INET) {
757 if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
758 return false;
759 } else {
760 if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
761 return false;
762 }
763
764 /* This is the best we've got: We cannot release and re-grab lock,
765 * since checkentry() is called before x_tables.c grabs xt_mutex.
766 * We also cannot grab the hashtable spinlock, since htable_create will
767 * call vmalloc, and that can sleep. And we cannot just re-search
768 * the list of htable's in htable_create(), since then we would
769 * create duplicate proc files. -HW */
770 mutex_lock(&hlimit_mutex);
771 info->hinfo = htable_find_get(info->name, match->family);
772 if (!info->hinfo && htable_create(info, match->family) != 0) {
773 mutex_unlock(&hlimit_mutex);
774 return false;
775 }
776 mutex_unlock(&hlimit_mutex);
777
778 /* Ugly hack: For SMP, we only want to use one set */
779 info->master = info;
780 return true;
781}
782
550static void 783static void
551hashlimit_destroy(const struct xt_match *match, void *matchinfo) 784hashlimit_mt_destroy_v0(const struct xt_match *match, void *matchinfo)
552{ 785{
553 const struct xt_hashlimit_info *r = matchinfo; 786 const struct xt_hashlimit_info *r = matchinfo;
554 787
555 htable_put(r->hinfo); 788 htable_put(r->hinfo);
556} 789}
557 790
791static void
792hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
793{
794 const struct xt_hashlimit_mtinfo1 *info = matchinfo;
795
796 htable_put(info->hinfo);
797}
798
558#ifdef CONFIG_COMPAT 799#ifdef CONFIG_COMPAT
559struct compat_xt_hashlimit_info { 800struct compat_xt_hashlimit_info {
560 char name[IFNAMSIZ]; 801 char name[IFNAMSIZ];
@@ -563,7 +804,7 @@ struct compat_xt_hashlimit_info {
563 compat_uptr_t master; 804 compat_uptr_t master;
564}; 805};
565 806
566static void compat_from_user(void *dst, void *src) 807static void hashlimit_mt_compat_from_user(void *dst, void *src)
567{ 808{
568 int off = offsetof(struct compat_xt_hashlimit_info, hinfo); 809 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
569 810
@@ -571,7 +812,7 @@ static void compat_from_user(void *dst, void *src)
571 memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off); 812 memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
572} 813}
573 814
574static int compat_to_user(void __user *dst, void *src) 815static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
575{ 816{
576 int off = offsetof(struct compat_xt_hashlimit_info, hinfo); 817 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
577 818
@@ -579,39 +820,63 @@ static int compat_to_user(void __user *dst, void *src)
579} 820}
580#endif 821#endif
581 822
582static struct xt_match xt_hashlimit[] __read_mostly = { 823static struct xt_match hashlimit_mt_reg[] __read_mostly = {
583 { 824 {
584 .name = "hashlimit", 825 .name = "hashlimit",
826 .revision = 0,
585 .family = AF_INET, 827 .family = AF_INET,
586 .match = hashlimit_match, 828 .match = hashlimit_mt_v0,
587 .matchsize = sizeof(struct xt_hashlimit_info), 829 .matchsize = sizeof(struct xt_hashlimit_info),
588#ifdef CONFIG_COMPAT 830#ifdef CONFIG_COMPAT
589 .compatsize = sizeof(struct compat_xt_hashlimit_info), 831 .compatsize = sizeof(struct compat_xt_hashlimit_info),
590 .compat_from_user = compat_from_user, 832 .compat_from_user = hashlimit_mt_compat_from_user,
591 .compat_to_user = compat_to_user, 833 .compat_to_user = hashlimit_mt_compat_to_user,
592#endif 834#endif
593 .checkentry = hashlimit_checkentry, 835 .checkentry = hashlimit_mt_check_v0,
594 .destroy = hashlimit_destroy, 836 .destroy = hashlimit_mt_destroy_v0,
595 .me = THIS_MODULE 837 .me = THIS_MODULE
596 }, 838 },
597 { 839 {
840 .name = "hashlimit",
841 .revision = 1,
842 .family = AF_INET,
843 .match = hashlimit_mt,
844 .matchsize = sizeof(struct xt_hashlimit_mtinfo1),
845 .checkentry = hashlimit_mt_check,
846 .destroy = hashlimit_mt_destroy,
847 .me = THIS_MODULE,
848 },
849#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
850 {
598 .name = "hashlimit", 851 .name = "hashlimit",
599 .family = AF_INET6, 852 .family = AF_INET6,
600 .match = hashlimit_match, 853 .match = hashlimit_mt_v0,
601 .matchsize = sizeof(struct xt_hashlimit_info), 854 .matchsize = sizeof(struct xt_hashlimit_info),
602#ifdef CONFIG_COMPAT 855#ifdef CONFIG_COMPAT
603 .compatsize = sizeof(struct compat_xt_hashlimit_info), 856 .compatsize = sizeof(struct compat_xt_hashlimit_info),
604 .compat_from_user = compat_from_user, 857 .compat_from_user = hashlimit_mt_compat_from_user,
605 .compat_to_user = compat_to_user, 858 .compat_to_user = hashlimit_mt_compat_to_user,
606#endif 859#endif
607 .checkentry = hashlimit_checkentry, 860 .checkentry = hashlimit_mt_check_v0,
608 .destroy = hashlimit_destroy, 861 .destroy = hashlimit_mt_destroy_v0,
609 .me = THIS_MODULE 862 .me = THIS_MODULE
610 }, 863 },
864 {
865 .name = "hashlimit",
866 .revision = 1,
867 .family = AF_INET6,
868 .match = hashlimit_mt,
869 .matchsize = sizeof(struct xt_hashlimit_mtinfo1),
870 .checkentry = hashlimit_mt_check,
871 .destroy = hashlimit_mt_destroy,
872 .me = THIS_MODULE,
873 },
874#endif
611}; 875};
612 876
613/* PROC stuff */ 877/* PROC stuff */
614static void *dl_seq_start(struct seq_file *s, loff_t *pos) 878static void *dl_seq_start(struct seq_file *s, loff_t *pos)
879 __acquires(htable->lock)
615{ 880{
616 struct proc_dir_entry *pde = s->private; 881 struct proc_dir_entry *pde = s->private;
617 struct xt_hashlimit_htable *htable = pde->data; 882 struct xt_hashlimit_htable *htable = pde->data;
@@ -644,6 +909,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
644} 909}
645 910
646static void dl_seq_stop(struct seq_file *s, void *v) 911static void dl_seq_stop(struct seq_file *s, void *v)
912 __releases(htable->lock)
647{ 913{
648 struct proc_dir_entry *pde = s->private; 914 struct proc_dir_entry *pde = s->private;
649 struct xt_hashlimit_htable *htable = pde->data; 915 struct xt_hashlimit_htable *htable = pde->data;
@@ -664,22 +930,24 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family,
664 return seq_printf(s, "%ld %u.%u.%u.%u:%u->" 930 return seq_printf(s, "%ld %u.%u.%u.%u:%u->"
665 "%u.%u.%u.%u:%u %u %u %u\n", 931 "%u.%u.%u.%u:%u %u %u %u\n",
666 (long)(ent->expires - jiffies)/HZ, 932 (long)(ent->expires - jiffies)/HZ,
667 NIPQUAD(ent->dst.addr.ip.src), 933 NIPQUAD(ent->dst.ip.src),
668 ntohs(ent->dst.src_port), 934 ntohs(ent->dst.src_port),
669 NIPQUAD(ent->dst.addr.ip.dst), 935 NIPQUAD(ent->dst.ip.dst),
670 ntohs(ent->dst.dst_port), 936 ntohs(ent->dst.dst_port),
671 ent->rateinfo.credit, ent->rateinfo.credit_cap, 937 ent->rateinfo.credit, ent->rateinfo.credit_cap,
672 ent->rateinfo.cost); 938 ent->rateinfo.cost);
939#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
673 case AF_INET6: 940 case AF_INET6:
674 return seq_printf(s, "%ld " NIP6_FMT ":%u->" 941 return seq_printf(s, "%ld " NIP6_FMT ":%u->"
675 NIP6_FMT ":%u %u %u %u\n", 942 NIP6_FMT ":%u %u %u %u\n",
676 (long)(ent->expires - jiffies)/HZ, 943 (long)(ent->expires - jiffies)/HZ,
677 NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.src), 944 NIP6(*(struct in6_addr *)&ent->dst.ip6.src),
678 ntohs(ent->dst.src_port), 945 ntohs(ent->dst.src_port),
679 NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.dst), 946 NIP6(*(struct in6_addr *)&ent->dst.ip6.dst),
680 ntohs(ent->dst.dst_port), 947 ntohs(ent->dst.dst_port),
681 ent->rateinfo.credit, ent->rateinfo.credit_cap, 948 ent->rateinfo.credit, ent->rateinfo.credit_cap,
682 ent->rateinfo.cost); 949 ent->rateinfo.cost);
950#endif
683 default: 951 default:
684 BUG(); 952 BUG();
685 return 0; 953 return 0;
@@ -728,11 +996,12 @@ static const struct file_operations dl_file_ops = {
728 .release = seq_release 996 .release = seq_release
729}; 997};
730 998
731static int __init xt_hashlimit_init(void) 999static int __init hashlimit_mt_init(void)
732{ 1000{
733 int err; 1001 int err;
734 1002
735 err = xt_register_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); 1003 err = xt_register_matches(hashlimit_mt_reg,
1004 ARRAY_SIZE(hashlimit_mt_reg));
736 if (err < 0) 1005 if (err < 0)
737 goto err1; 1006 goto err1;
738 1007
@@ -750,31 +1019,36 @@ static int __init xt_hashlimit_init(void)
750 "entry\n"); 1019 "entry\n");
751 goto err3; 1020 goto err3;
752 } 1021 }
1022 err = 0;
1023#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
753 hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); 1024 hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
754 if (!hashlimit_procdir6) { 1025 if (!hashlimit_procdir6) {
755 printk(KERN_ERR "xt_hashlimit: unable to create proc dir " 1026 printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
756 "entry\n"); 1027 "entry\n");
757 goto err4; 1028 err = -ENOMEM;
758 } 1029 }
759 return 0; 1030#endif
760err4: 1031 if (!err)
1032 return 0;
761 remove_proc_entry("ipt_hashlimit", init_net.proc_net); 1033 remove_proc_entry("ipt_hashlimit", init_net.proc_net);
762err3: 1034err3:
763 kmem_cache_destroy(hashlimit_cachep); 1035 kmem_cache_destroy(hashlimit_cachep);
764err2: 1036err2:
765 xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); 1037 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
766err1: 1038err1:
767 return err; 1039 return err;
768 1040
769} 1041}
770 1042
771static void __exit xt_hashlimit_fini(void) 1043static void __exit hashlimit_mt_exit(void)
772{ 1044{
773 remove_proc_entry("ipt_hashlimit", init_net.proc_net); 1045 remove_proc_entry("ipt_hashlimit", init_net.proc_net);
1046#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
774 remove_proc_entry("ip6t_hashlimit", init_net.proc_net); 1047 remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
1048#endif
775 kmem_cache_destroy(hashlimit_cachep); 1049 kmem_cache_destroy(hashlimit_cachep);
776 xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); 1050 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
777} 1051}
778 1052
779module_init(xt_hashlimit_init); 1053module_init(hashlimit_mt_init);
780module_exit(xt_hashlimit_fini); 1054module_exit(hashlimit_mt_exit);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 0a1f4c6bcdef..dada2905d66e 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -18,20 +18,16 @@
18 18
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); 20MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
21MODULE_DESCRIPTION("iptables helper match module"); 21MODULE_DESCRIPTION("Xtables: Related connection matching");
22MODULE_ALIAS("ipt_helper"); 22MODULE_ALIAS("ipt_helper");
23MODULE_ALIAS("ip6t_helper"); 23MODULE_ALIAS("ip6t_helper");
24 24
25 25
26static bool 26static bool
27match(const struct sk_buff *skb, 27helper_mt(const struct sk_buff *skb, const struct net_device *in,
28 const struct net_device *in, 28 const struct net_device *out, const struct xt_match *match,
29 const struct net_device *out, 29 const void *matchinfo, int offset, unsigned int protoff,
30 const struct xt_match *match, 30 bool *hotdrop)
31 const void *matchinfo,
32 int offset,
33 unsigned int protoff,
34 bool *hotdrop)
35{ 31{
36 const struct xt_helper_info *info = matchinfo; 32 const struct xt_helper_info *info = matchinfo;
37 const struct nf_conn *ct; 33 const struct nf_conn *ct;
@@ -56,66 +52,62 @@ match(const struct sk_buff *skb,
56 if (info->name[0] == '\0') 52 if (info->name[0] == '\0')
57 ret = !ret; 53 ret = !ret;
58 else 54 else
59 ret ^= !strncmp(master_help->helper->name, info->name, 55 ret ^= !strncmp(helper->name, info->name,
60 strlen(master_help->helper->name)); 56 strlen(helper->name));
61 return ret; 57 return ret;
62} 58}
63 59
64static bool check(const char *tablename, 60static bool
65 const void *inf, 61helper_mt_check(const char *tablename, const void *inf,
66 const struct xt_match *match, 62 const struct xt_match *match, void *matchinfo,
67 void *matchinfo, 63 unsigned int hook_mask)
68 unsigned int hook_mask)
69{ 64{
70 struct xt_helper_info *info = matchinfo; 65 struct xt_helper_info *info = matchinfo;
71 66
72 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 67 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
73 printk(KERN_WARNING "can't load conntrack support for " 68 printk(KERN_WARNING "can't load conntrack support for "
74 "proto=%d\n", match->family); 69 "proto=%u\n", match->family);
75 return false; 70 return false;
76 } 71 }
77 info->name[29] = '\0'; 72 info->name[29] = '\0';
78 return true; 73 return true;
79} 74}
80 75
81static void 76static void helper_mt_destroy(const struct xt_match *match, void *matchinfo)
82destroy(const struct xt_match *match, void *matchinfo)
83{ 77{
84 nf_ct_l3proto_module_put(match->family); 78 nf_ct_l3proto_module_put(match->family);
85} 79}
86 80
87static struct xt_match xt_helper_match[] __read_mostly = { 81static struct xt_match helper_mt_reg[] __read_mostly = {
88 { 82 {
89 .name = "helper", 83 .name = "helper",
90 .family = AF_INET, 84 .family = AF_INET,
91 .checkentry = check, 85 .checkentry = helper_mt_check,
92 .match = match, 86 .match = helper_mt,
93 .destroy = destroy, 87 .destroy = helper_mt_destroy,
94 .matchsize = sizeof(struct xt_helper_info), 88 .matchsize = sizeof(struct xt_helper_info),
95 .me = THIS_MODULE, 89 .me = THIS_MODULE,
96 }, 90 },
97 { 91 {
98 .name = "helper", 92 .name = "helper",
99 .family = AF_INET6, 93 .family = AF_INET6,
100 .checkentry = check, 94 .checkentry = helper_mt_check,
101 .match = match, 95 .match = helper_mt,
102 .destroy = destroy, 96 .destroy = helper_mt_destroy,
103 .matchsize = sizeof(struct xt_helper_info), 97 .matchsize = sizeof(struct xt_helper_info),
104 .me = THIS_MODULE, 98 .me = THIS_MODULE,
105 }, 99 },
106}; 100};
107 101
108static int __init xt_helper_init(void) 102static int __init helper_mt_init(void)
109{ 103{
110 return xt_register_matches(xt_helper_match, 104 return xt_register_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
111 ARRAY_SIZE(xt_helper_match));
112} 105}
113 106
114static void __exit xt_helper_fini(void) 107static void __exit helper_mt_exit(void)
115{ 108{
116 xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); 109 xt_unregister_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
117} 110}
118 111
119module_init(xt_helper_init); 112module_init(helper_mt_init);
120module_exit(xt_helper_fini); 113module_exit(helper_mt_exit);
121
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
new file mode 100644
index 000000000000..01035fc0e140
--- /dev/null
+++ b/net/netfilter/xt_iprange.c
@@ -0,0 +1,180 @@
1/*
2 * xt_iprange - Netfilter module to match IP address ranges
3 *
4 * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 * (C) CC Computer Consultants GmbH, 2008
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <linux/ipv6.h>
15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv4/ipt_iprange.h>
17
18static bool
19iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
20 const struct net_device *out, const struct xt_match *match,
21 const void *matchinfo, int offset, unsigned int protoff,
22 bool *hotdrop)
23{
24 const struct ipt_iprange_info *info = matchinfo;
25 const struct iphdr *iph = ip_hdr(skb);
26
27 if (info->flags & IPRANGE_SRC) {
28 if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
29 || ntohl(iph->saddr) > ntohl(info->src.max_ip))
30 ^ !!(info->flags & IPRANGE_SRC_INV)) {
31 pr_debug("src IP %u.%u.%u.%u NOT in range %s"
32 "%u.%u.%u.%u-%u.%u.%u.%u\n",
33 NIPQUAD(iph->saddr),
34 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
35 NIPQUAD(info->src.min_ip),
36 NIPQUAD(info->src.max_ip));
37 return false;
38 }
39 }
40 if (info->flags & IPRANGE_DST) {
41 if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
42 || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
43 ^ !!(info->flags & IPRANGE_DST_INV)) {
44 pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
45 "%u.%u.%u.%u-%u.%u.%u.%u\n",
46 NIPQUAD(iph->daddr),
47 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
48 NIPQUAD(info->dst.min_ip),
49 NIPQUAD(info->dst.max_ip));
50 return false;
51 }
52 }
53 return true;
54}
55
56static bool
57iprange_mt4(const struct sk_buff *skb, const struct net_device *in,
58 const struct net_device *out, const struct xt_match *match,
59 const void *matchinfo, int offset, unsigned int protoff,
60 bool *hotdrop)
61{
62 const struct xt_iprange_mtinfo *info = matchinfo;
63 const struct iphdr *iph = ip_hdr(skb);
64 bool m;
65
66 if (info->flags & IPRANGE_SRC) {
67 m = ntohl(iph->saddr) < ntohl(info->src_min.ip);
68 m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
69 m ^= info->flags & IPRANGE_SRC_INV;
70 if (m) {
71 pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
72 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
73 NIPQUAD(iph->saddr),
74 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
75 NIPQUAD(info->src_max.ip),
76 NIPQUAD(info->src_max.ip));
77 return false;
78 }
79 }
80 if (info->flags & IPRANGE_DST) {
81 m = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
82 m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
83 m ^= info->flags & IPRANGE_DST_INV;
84 if (m) {
85 pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
86 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
87 NIPQUAD(iph->daddr),
88 (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
89 NIPQUAD(info->dst_min.ip),
90 NIPQUAD(info->dst_max.ip));
91 return false;
92 }
93 }
94 return true;
95}
96
97static inline int
98iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
99{
100 unsigned int i;
101 int r;
102
103 for (i = 0; i < 4; ++i) {
104 r = (__force u32)a->s6_addr32[i] - (__force u32)b->s6_addr32[i];
105 if (r != 0)
106 return r;
107 }
108
109 return 0;
110}
111
112static bool
113iprange_mt6(const struct sk_buff *skb, const struct net_device *in,
114 const struct net_device *out, const struct xt_match *match,
115 const void *matchinfo, int offset, unsigned int protoff,
116 bool *hotdrop)
117{
118 const struct xt_iprange_mtinfo *info = matchinfo;
119 const struct ipv6hdr *iph = ipv6_hdr(skb);
120 bool m;
121
122 if (info->flags & IPRANGE_SRC) {
123 m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
124 m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
125 m ^= info->flags & IPRANGE_SRC_INV;
126 if (m)
127 return false;
128 }
129 if (info->flags & IPRANGE_DST) {
130 m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
131 m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
132 m ^= info->flags & IPRANGE_DST_INV;
133 if (m)
134 return false;
135 }
136 return true;
137}
138
139static struct xt_match iprange_mt_reg[] __read_mostly = {
140 {
141 .name = "iprange",
142 .revision = 0,
143 .family = AF_INET,
144 .match = iprange_mt_v0,
145 .matchsize = sizeof(struct ipt_iprange_info),
146 .me = THIS_MODULE,
147 },
148 {
149 .name = "iprange",
150 .revision = 1,
151 .family = AF_INET6,
152 .match = iprange_mt4,
153 .matchsize = sizeof(struct xt_iprange_mtinfo),
154 .me = THIS_MODULE,
155 },
156 {
157 .name = "iprange",
158 .revision = 1,
159 .family = AF_INET6,
160 .match = iprange_mt6,
161 .matchsize = sizeof(struct xt_iprange_mtinfo),
162 .me = THIS_MODULE,
163 },
164};
165
166static int __init iprange_mt_init(void)
167{
168 return xt_register_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
169}
170
171static void __exit iprange_mt_exit(void)
172{
173 xt_unregister_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
174}
175
176module_init(iprange_mt_init);
177module_exit(iprange_mt_exit);
178MODULE_LICENSE("GPL");
179MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>");
180MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 3dad173d9735..b8640f972950 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -15,20 +15,16 @@
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16 16
17MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 17MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
18MODULE_DESCRIPTION("IP tables packet length matching module"); 18MODULE_DESCRIPTION("Xtables: Packet length (Layer3,4,5) match");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_ALIAS("ipt_length"); 20MODULE_ALIAS("ipt_length");
21MODULE_ALIAS("ip6t_length"); 21MODULE_ALIAS("ip6t_length");
22 22
23static bool 23static bool
24match(const struct sk_buff *skb, 24length_mt(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *in, 25 const struct net_device *out, const struct xt_match *match,
26 const struct net_device *out, 26 const void *matchinfo, int offset, unsigned int protoff,
27 const struct xt_match *match, 27 bool *hotdrop)
28 const void *matchinfo,
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 28{
33 const struct xt_length_info *info = matchinfo; 29 const struct xt_length_info *info = matchinfo;
34 u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); 30 u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -37,14 +33,10 @@ match(const struct sk_buff *skb,
37} 33}
38 34
39static bool 35static bool
40match6(const struct sk_buff *skb, 36length_mt6(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 37 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 38 const void *matchinfo, int offset, unsigned int protoff,
43 const struct xt_match *match, 39 bool *hotdrop)
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 40{
49 const struct xt_length_info *info = matchinfo; 41 const struct xt_length_info *info = matchinfo;
50 const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + 42 const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
@@ -53,33 +45,32 @@ match6(const struct sk_buff *skb,
53 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; 45 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
54} 46}
55 47
56static struct xt_match xt_length_match[] __read_mostly = { 48static struct xt_match length_mt_reg[] __read_mostly = {
57 { 49 {
58 .name = "length", 50 .name = "length",
59 .family = AF_INET, 51 .family = AF_INET,
60 .match = match, 52 .match = length_mt,
61 .matchsize = sizeof(struct xt_length_info), 53 .matchsize = sizeof(struct xt_length_info),
62 .me = THIS_MODULE, 54 .me = THIS_MODULE,
63 }, 55 },
64 { 56 {
65 .name = "length", 57 .name = "length",
66 .family = AF_INET6, 58 .family = AF_INET6,
67 .match = match6, 59 .match = length_mt6,
68 .matchsize = sizeof(struct xt_length_info), 60 .matchsize = sizeof(struct xt_length_info),
69 .me = THIS_MODULE, 61 .me = THIS_MODULE,
70 }, 62 },
71}; 63};
72 64
73static int __init xt_length_init(void) 65static int __init length_mt_init(void)
74{ 66{
75 return xt_register_matches(xt_length_match, 67 return xt_register_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
76 ARRAY_SIZE(xt_length_match));
77} 68}
78 69
79static void __exit xt_length_fini(void) 70static void __exit length_mt_exit(void)
80{ 71{
81 xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match)); 72 xt_unregister_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
82} 73}
83 74
84module_init(xt_length_init); 75module_init(length_mt_init);
85module_exit(xt_length_fini); 76module_exit(length_mt_exit);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index f263a77e57b7..aad9ab8d2046 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -16,7 +16,7 @@
16 16
17MODULE_LICENSE("GPL"); 17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>"); 18MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
19MODULE_DESCRIPTION("iptables rate limit match"); 19MODULE_DESCRIPTION("Xtables: rate-limit match");
20MODULE_ALIAS("ipt_limit"); 20MODULE_ALIAS("ipt_limit");
21MODULE_ALIAS("ip6t_limit"); 21MODULE_ALIAS("ip6t_limit");
22 22
@@ -58,14 +58,10 @@ static DEFINE_SPINLOCK(limit_lock);
58#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) 58#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
59 59
60static bool 60static bool
61ipt_limit_match(const struct sk_buff *skb, 61limit_mt(const struct sk_buff *skb, const struct net_device *in,
62 const struct net_device *in, 62 const struct net_device *out, const struct xt_match *match,
63 const struct net_device *out, 63 const void *matchinfo, int offset, unsigned int protoff,
64 const struct xt_match *match, 64 bool *hotdrop)
65 const void *matchinfo,
66 int offset,
67 unsigned int protoff,
68 bool *hotdrop)
69{ 65{
70 struct xt_rateinfo *r = 66 struct xt_rateinfo *r =
71 ((const struct xt_rateinfo *)matchinfo)->master; 67 ((const struct xt_rateinfo *)matchinfo)->master;
@@ -100,11 +96,9 @@ user2credits(u_int32_t user)
100} 96}
101 97
102static bool 98static bool
103ipt_limit_checkentry(const char *tablename, 99limit_mt_check(const char *tablename, const void *inf,
104 const void *inf, 100 const struct xt_match *match, void *matchinfo,
105 const struct xt_match *match, 101 unsigned int hook_mask)
106 void *matchinfo,
107 unsigned int hook_mask)
108{ 102{
109 struct xt_rateinfo *r = matchinfo; 103 struct xt_rateinfo *r = matchinfo;
110 104
@@ -143,7 +137,7 @@ struct compat_xt_rateinfo {
143 137
144/* To keep the full "prev" timestamp, the upper 32 bits are stored in the 138/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
145 * master pointer, which does not need to be preserved. */ 139 * master pointer, which does not need to be preserved. */
146static void compat_from_user(void *dst, void *src) 140static void limit_mt_compat_from_user(void *dst, void *src)
147{ 141{
148 const struct compat_xt_rateinfo *cm = src; 142 const struct compat_xt_rateinfo *cm = src;
149 struct xt_rateinfo m = { 143 struct xt_rateinfo m = {
@@ -157,7 +151,7 @@ static void compat_from_user(void *dst, void *src)
157 memcpy(dst, &m, sizeof(m)); 151 memcpy(dst, &m, sizeof(m));
158} 152}
159 153
160static int compat_to_user(void __user *dst, void *src) 154static int limit_mt_compat_to_user(void __user *dst, void *src)
161{ 155{
162 const struct xt_rateinfo *m = src; 156 const struct xt_rateinfo *m = src;
163 struct compat_xt_rateinfo cm = { 157 struct compat_xt_rateinfo cm = {
@@ -173,39 +167,44 @@ static int compat_to_user(void __user *dst, void *src)
173} 167}
174#endif /* CONFIG_COMPAT */ 168#endif /* CONFIG_COMPAT */
175 169
176static struct xt_match xt_limit_match[] __read_mostly = { 170static struct xt_match limit_mt_reg[] __read_mostly = {
177 { 171 {
178 .name = "limit", 172 .name = "limit",
179 .family = AF_INET, 173 .family = AF_INET,
180 .checkentry = ipt_limit_checkentry, 174 .checkentry = limit_mt_check,
181 .match = ipt_limit_match, 175 .match = limit_mt,
182 .matchsize = sizeof(struct xt_rateinfo), 176 .matchsize = sizeof(struct xt_rateinfo),
183#ifdef CONFIG_COMPAT 177#ifdef CONFIG_COMPAT
184 .compatsize = sizeof(struct compat_xt_rateinfo), 178 .compatsize = sizeof(struct compat_xt_rateinfo),
185 .compat_from_user = compat_from_user, 179 .compat_from_user = limit_mt_compat_from_user,
186 .compat_to_user = compat_to_user, 180 .compat_to_user = limit_mt_compat_to_user,
187#endif 181#endif
188 .me = THIS_MODULE, 182 .me = THIS_MODULE,
189 }, 183 },
190 { 184 {
191 .name = "limit", 185 .name = "limit",
192 .family = AF_INET6, 186 .family = AF_INET6,
193 .checkentry = ipt_limit_checkentry, 187 .checkentry = limit_mt_check,
194 .match = ipt_limit_match, 188 .match = limit_mt,
195 .matchsize = sizeof(struct xt_rateinfo), 189 .matchsize = sizeof(struct xt_rateinfo),
190#ifdef CONFIG_COMPAT
191 .compatsize = sizeof(struct compat_xt_rateinfo),
192 .compat_from_user = limit_mt_compat_from_user,
193 .compat_to_user = limit_mt_compat_to_user,
194#endif
196 .me = THIS_MODULE, 195 .me = THIS_MODULE,
197 }, 196 },
198}; 197};
199 198
200static int __init xt_limit_init(void) 199static int __init limit_mt_init(void)
201{ 200{
202 return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); 201 return xt_register_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
203} 202}
204 203
205static void __exit xt_limit_fini(void) 204static void __exit limit_mt_exit(void)
206{ 205{
207 xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); 206 xt_unregister_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
208} 207}
209 208
210module_init(xt_limit_init); 209module_init(limit_mt_init);
211module_exit(xt_limit_fini); 210module_exit(limit_mt_exit);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 00490d777a0f..b3e96a0ec176 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -20,19 +20,14 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 22MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
23MODULE_DESCRIPTION("iptables mac matching module"); 23MODULE_DESCRIPTION("Xtables: MAC address match");
24MODULE_ALIAS("ipt_mac"); 24MODULE_ALIAS("ipt_mac");
25MODULE_ALIAS("ip6t_mac"); 25MODULE_ALIAS("ip6t_mac");
26 26
27static bool 27static bool
28match(const struct sk_buff *skb, 28mac_mt(const struct sk_buff *skb, const struct net_device *in,
29 const struct net_device *in, 29 const struct net_device *out, const struct xt_match *match,
30 const struct net_device *out, 30 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
31 const struct xt_match *match,
32 const void *matchinfo,
33 int offset,
34 unsigned int protoff,
35 bool *hotdrop)
36{ 31{
37 const struct xt_mac_info *info = matchinfo; 32 const struct xt_mac_info *info = matchinfo;
38 33
@@ -44,38 +39,38 @@ match(const struct sk_buff *skb,
44 ^ info->invert); 39 ^ info->invert);
45} 40}
46 41
47static struct xt_match xt_mac_match[] __read_mostly = { 42static struct xt_match mac_mt_reg[] __read_mostly = {
48 { 43 {
49 .name = "mac", 44 .name = "mac",
50 .family = AF_INET, 45 .family = AF_INET,
51 .match = match, 46 .match = mac_mt,
52 .matchsize = sizeof(struct xt_mac_info), 47 .matchsize = sizeof(struct xt_mac_info),
53 .hooks = (1 << NF_IP_PRE_ROUTING) | 48 .hooks = (1 << NF_INET_PRE_ROUTING) |
54 (1 << NF_IP_LOCAL_IN) | 49 (1 << NF_INET_LOCAL_IN) |
55 (1 << NF_IP_FORWARD), 50 (1 << NF_INET_FORWARD),
56 .me = THIS_MODULE, 51 .me = THIS_MODULE,
57 }, 52 },
58 { 53 {
59 .name = "mac", 54 .name = "mac",
60 .family = AF_INET6, 55 .family = AF_INET6,
61 .match = match, 56 .match = mac_mt,
62 .matchsize = sizeof(struct xt_mac_info), 57 .matchsize = sizeof(struct xt_mac_info),
63 .hooks = (1 << NF_IP6_PRE_ROUTING) | 58 .hooks = (1 << NF_INET_PRE_ROUTING) |
64 (1 << NF_IP6_LOCAL_IN) | 59 (1 << NF_INET_LOCAL_IN) |
65 (1 << NF_IP6_FORWARD), 60 (1 << NF_INET_FORWARD),
66 .me = THIS_MODULE, 61 .me = THIS_MODULE,
67 }, 62 },
68}; 63};
69 64
70static int __init xt_mac_init(void) 65static int __init mac_mt_init(void)
71{ 66{
72 return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); 67 return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
73} 68}
74 69
75static void __exit xt_mac_fini(void) 70static void __exit mac_mt_exit(void)
76{ 71{
77 xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); 72 xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
78} 73}
79 74
80module_init(xt_mac_init); 75module_init(mac_mt_init);
81module_exit(xt_mac_fini); 76module_exit(mac_mt_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index c02a7f8f3925..9f78f6120fbd 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -1,10 +1,13 @@
1/* Kernel module to match NFMARK values. */ 1/*
2 2 * xt_mark - Netfilter module to match NFMARK value
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca> 3 *
4 * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
5 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
6 * Jan Engelhardt <jengelh@computergmbh.de>
4 * 7 *
5 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
8 */ 11 */
9 12
10#include <linux/module.h> 13#include <linux/module.h>
@@ -15,19 +18,15 @@
15 18
16MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 20MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
18MODULE_DESCRIPTION("iptables mark matching module"); 21MODULE_DESCRIPTION("Xtables: packet mark match");
19MODULE_ALIAS("ipt_mark"); 22MODULE_ALIAS("ipt_mark");
20MODULE_ALIAS("ip6t_mark"); 23MODULE_ALIAS("ip6t_mark");
21 24
22static bool 25static bool
23match(const struct sk_buff *skb, 26mark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, 27 const struct net_device *out, const struct xt_match *match,
25 const struct net_device *out, 28 const void *matchinfo, int offset, unsigned int protoff,
26 const struct xt_match *match, 29 bool *hotdrop)
27 const void *matchinfo,
28 int offset,
29 unsigned int protoff,
30 bool *hotdrop)
31{ 30{
32 const struct xt_mark_info *info = matchinfo; 31 const struct xt_mark_info *info = matchinfo;
33 32
@@ -35,11 +34,19 @@ match(const struct sk_buff *skb,
35} 34}
36 35
37static bool 36static bool
38checkentry(const char *tablename, 37mark_mt(const struct sk_buff *skb, const struct net_device *in,
39 const void *entry, 38 const struct net_device *out, const struct xt_match *match,
40 const struct xt_match *match, 39 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
41 void *matchinfo, 40{
42 unsigned int hook_mask) 41 const struct xt_mark_mtinfo1 *info = matchinfo;
42
43 return ((skb->mark & info->mask) == info->mark) ^ info->invert;
44}
45
46static bool
47mark_mt_check_v0(const char *tablename, const void *entry,
48 const struct xt_match *match, void *matchinfo,
49 unsigned int hook_mask)
43{ 50{
44 const struct xt_mark_info *minfo = matchinfo; 51 const struct xt_mark_info *minfo = matchinfo;
45 52
@@ -58,7 +65,7 @@ struct compat_xt_mark_info {
58 u_int16_t __pad2; 65 u_int16_t __pad2;
59}; 66};
60 67
61static void compat_from_user(void *dst, void *src) 68static void mark_mt_compat_from_user_v0(void *dst, void *src)
62{ 69{
63 const struct compat_xt_mark_info *cm = src; 70 const struct compat_xt_mark_info *cm = src;
64 struct xt_mark_info m = { 71 struct xt_mark_info m = {
@@ -69,7 +76,7 @@ static void compat_from_user(void *dst, void *src)
69 memcpy(dst, &m, sizeof(m)); 76 memcpy(dst, &m, sizeof(m));
70} 77}
71 78
72static int compat_to_user(void __user *dst, void *src) 79static int mark_mt_compat_to_user_v0(void __user *dst, void *src)
73{ 80{
74 const struct xt_mark_info *m = src; 81 const struct xt_mark_info *m = src;
75 struct compat_xt_mark_info cm = { 82 struct compat_xt_mark_info cm = {
@@ -81,39 +88,62 @@ static int compat_to_user(void __user *dst, void *src)
81} 88}
82#endif /* CONFIG_COMPAT */ 89#endif /* CONFIG_COMPAT */
83 90
84static struct xt_match xt_mark_match[] __read_mostly = { 91static struct xt_match mark_mt_reg[] __read_mostly = {
85 { 92 {
86 .name = "mark", 93 .name = "mark",
94 .revision = 0,
87 .family = AF_INET, 95 .family = AF_INET,
88 .checkentry = checkentry, 96 .checkentry = mark_mt_check_v0,
89 .match = match, 97 .match = mark_mt_v0,
90 .matchsize = sizeof(struct xt_mark_info), 98 .matchsize = sizeof(struct xt_mark_info),
91#ifdef CONFIG_COMPAT 99#ifdef CONFIG_COMPAT
92 .compatsize = sizeof(struct compat_xt_mark_info), 100 .compatsize = sizeof(struct compat_xt_mark_info),
93 .compat_from_user = compat_from_user, 101 .compat_from_user = mark_mt_compat_from_user_v0,
94 .compat_to_user = compat_to_user, 102 .compat_to_user = mark_mt_compat_to_user_v0,
95#endif 103#endif
96 .me = THIS_MODULE, 104 .me = THIS_MODULE,
97 }, 105 },
98 { 106 {
99 .name = "mark", 107 .name = "mark",
108 .revision = 0,
100 .family = AF_INET6, 109 .family = AF_INET6,
101 .checkentry = checkentry, 110 .checkentry = mark_mt_check_v0,
102 .match = match, 111 .match = mark_mt_v0,
103 .matchsize = sizeof(struct xt_mark_info), 112 .matchsize = sizeof(struct xt_mark_info),
113#ifdef CONFIG_COMPAT
114 .compatsize = sizeof(struct compat_xt_mark_info),
115 .compat_from_user = mark_mt_compat_from_user_v0,
116 .compat_to_user = mark_mt_compat_to_user_v0,
117#endif
104 .me = THIS_MODULE, 118 .me = THIS_MODULE,
105 }, 119 },
120 {
121 .name = "mark",
122 .revision = 1,
123 .family = AF_INET,
124 .match = mark_mt,
125 .matchsize = sizeof(struct xt_mark_mtinfo1),
126 .me = THIS_MODULE,
127 },
128 {
129 .name = "mark",
130 .revision = 1,
131 .family = AF_INET6,
132 .match = mark_mt,
133 .matchsize = sizeof(struct xt_mark_mtinfo1),
134 .me = THIS_MODULE,
135 },
106}; 136};
107 137
108static int __init xt_mark_init(void) 138static int __init mark_mt_init(void)
109{ 139{
110 return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); 140 return xt_register_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
111} 141}
112 142
113static void __exit xt_mark_fini(void) 143static void __exit mark_mt_exit(void)
114{ 144{
115 xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); 145 xt_unregister_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
116} 146}
117 147
118module_init(xt_mark_init); 148module_init(mark_mt_init);
119module_exit(xt_mark_fini); 149module_exit(mark_mt_exit);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index e8ae10284acd..31daa8192422 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -22,7 +22,7 @@
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 24MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
25MODULE_DESCRIPTION("x_tables multiple port match module"); 25MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP and DCCP");
26MODULE_ALIAS("ipt_multiport"); 26MODULE_ALIAS("ipt_multiport");
27MODULE_ALIAS("ip6t_multiport"); 27MODULE_ALIAS("ip6t_multiport");
28 28
@@ -34,8 +34,8 @@ MODULE_ALIAS("ip6t_multiport");
34 34
35/* Returns 1 if the port is matched by the test, 0 otherwise. */ 35/* Returns 1 if the port is matched by the test, 0 otherwise. */
36static inline bool 36static inline bool
37ports_match(const u_int16_t *portlist, enum xt_multiport_flags flags, 37ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
38 u_int8_t count, u_int16_t src, u_int16_t dst) 38 u_int8_t count, u_int16_t src, u_int16_t dst)
39{ 39{
40 unsigned int i; 40 unsigned int i;
41 for (i = 0; i < count; i++) { 41 for (i = 0; i < count; i++) {
@@ -95,14 +95,10 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
95} 95}
96 96
97static bool 97static bool
98match(const struct sk_buff *skb, 98multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in,
99 const struct net_device *in, 99 const struct net_device *out, const struct xt_match *match,
100 const struct net_device *out, 100 const void *matchinfo, int offset, unsigned int protoff,
101 const struct xt_match *match, 101 bool *hotdrop)
102 const void *matchinfo,
103 int offset,
104 unsigned int protoff,
105 bool *hotdrop)
106{ 102{
107 __be16 _ports[2], *pptr; 103 __be16 _ports[2], *pptr;
108 const struct xt_multiport *multiinfo = matchinfo; 104 const struct xt_multiport *multiinfo = matchinfo;
@@ -120,20 +116,15 @@ match(const struct sk_buff *skb,
120 return false; 116 return false;
121 } 117 }
122 118
123 return ports_match(multiinfo->ports, 119 return ports_match_v0(multiinfo->ports, multiinfo->flags,
124 multiinfo->flags, multiinfo->count, 120 multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1]));
125 ntohs(pptr[0]), ntohs(pptr[1]));
126} 121}
127 122
128static bool 123static bool
129match_v1(const struct sk_buff *skb, 124multiport_mt(const struct sk_buff *skb, const struct net_device *in,
130 const struct net_device *in, 125 const struct net_device *out, const struct xt_match *match,
131 const struct net_device *out, 126 const void *matchinfo, int offset, unsigned int protoff,
132 const struct xt_match *match, 127 bool *hotdrop)
133 const void *matchinfo,
134 int offset,
135 unsigned int protoff,
136 bool *hotdrop)
137{ 128{
138 __be16 _ports[2], *pptr; 129 __be16 _ports[2], *pptr;
139 const struct xt_multiport_v1 *multiinfo = matchinfo; 130 const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -173,11 +164,9 @@ check(u_int16_t proto,
173 164
174/* Called when user tries to insert an entry of this type. */ 165/* Called when user tries to insert an entry of this type. */
175static bool 166static bool
176checkentry(const char *tablename, 167multiport_mt_check_v0(const char *tablename, const void *info,
177 const void *info, 168 const struct xt_match *match, void *matchinfo,
178 const struct xt_match *match, 169 unsigned int hook_mask)
179 void *matchinfo,
180 unsigned int hook_mask)
181{ 170{
182 const struct ipt_ip *ip = info; 171 const struct ipt_ip *ip = info;
183 const struct xt_multiport *multiinfo = matchinfo; 172 const struct xt_multiport *multiinfo = matchinfo;
@@ -187,11 +176,9 @@ checkentry(const char *tablename,
187} 176}
188 177
189static bool 178static bool
190checkentry_v1(const char *tablename, 179multiport_mt_check(const char *tablename, const void *info,
191 const void *info, 180 const struct xt_match *match, void *matchinfo,
192 const struct xt_match *match, 181 unsigned int hook_mask)
193 void *matchinfo,
194 unsigned int hook_mask)
195{ 182{
196 const struct ipt_ip *ip = info; 183 const struct ipt_ip *ip = info;
197 const struct xt_multiport_v1 *multiinfo = matchinfo; 184 const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -201,11 +188,9 @@ checkentry_v1(const char *tablename,
201} 188}
202 189
203static bool 190static bool
204checkentry6(const char *tablename, 191multiport_mt6_check_v0(const char *tablename, const void *info,
205 const void *info, 192 const struct xt_match *match, void *matchinfo,
206 const struct xt_match *match, 193 unsigned int hook_mask)
207 void *matchinfo,
208 unsigned int hook_mask)
209{ 194{
210 const struct ip6t_ip6 *ip = info; 195 const struct ip6t_ip6 *ip = info;
211 const struct xt_multiport *multiinfo = matchinfo; 196 const struct xt_multiport *multiinfo = matchinfo;
@@ -215,11 +200,9 @@ checkentry6(const char *tablename,
215} 200}
216 201
217static bool 202static bool
218checkentry6_v1(const char *tablename, 203multiport_mt6_check(const char *tablename, const void *info,
219 const void *info, 204 const struct xt_match *match, void *matchinfo,
220 const struct xt_match *match, 205 unsigned int hook_mask)
221 void *matchinfo,
222 unsigned int hook_mask)
223{ 206{
224 const struct ip6t_ip6 *ip = info; 207 const struct ip6t_ip6 *ip = info;
225 const struct xt_multiport_v1 *multiinfo = matchinfo; 208 const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -228,13 +211,13 @@ checkentry6_v1(const char *tablename,
228 multiinfo->count); 211 multiinfo->count);
229} 212}
230 213
231static struct xt_match xt_multiport_match[] __read_mostly = { 214static struct xt_match multiport_mt_reg[] __read_mostly = {
232 { 215 {
233 .name = "multiport", 216 .name = "multiport",
234 .family = AF_INET, 217 .family = AF_INET,
235 .revision = 0, 218 .revision = 0,
236 .checkentry = checkentry, 219 .checkentry = multiport_mt_check_v0,
237 .match = match, 220 .match = multiport_mt_v0,
238 .matchsize = sizeof(struct xt_multiport), 221 .matchsize = sizeof(struct xt_multiport),
239 .me = THIS_MODULE, 222 .me = THIS_MODULE,
240 }, 223 },
@@ -242,8 +225,8 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
242 .name = "multiport", 225 .name = "multiport",
243 .family = AF_INET, 226 .family = AF_INET,
244 .revision = 1, 227 .revision = 1,
245 .checkentry = checkentry_v1, 228 .checkentry = multiport_mt_check,
246 .match = match_v1, 229 .match = multiport_mt,
247 .matchsize = sizeof(struct xt_multiport_v1), 230 .matchsize = sizeof(struct xt_multiport_v1),
248 .me = THIS_MODULE, 231 .me = THIS_MODULE,
249 }, 232 },
@@ -251,8 +234,8 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
251 .name = "multiport", 234 .name = "multiport",
252 .family = AF_INET6, 235 .family = AF_INET6,
253 .revision = 0, 236 .revision = 0,
254 .checkentry = checkentry6, 237 .checkentry = multiport_mt6_check_v0,
255 .match = match, 238 .match = multiport_mt_v0,
256 .matchsize = sizeof(struct xt_multiport), 239 .matchsize = sizeof(struct xt_multiport),
257 .me = THIS_MODULE, 240 .me = THIS_MODULE,
258 }, 241 },
@@ -260,24 +243,23 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
260 .name = "multiport", 243 .name = "multiport",
261 .family = AF_INET6, 244 .family = AF_INET6,
262 .revision = 1, 245 .revision = 1,
263 .checkentry = checkentry6_v1, 246 .checkentry = multiport_mt6_check,
264 .match = match_v1, 247 .match = multiport_mt,
265 .matchsize = sizeof(struct xt_multiport_v1), 248 .matchsize = sizeof(struct xt_multiport_v1),
266 .me = THIS_MODULE, 249 .me = THIS_MODULE,
267 }, 250 },
268}; 251};
269 252
270static int __init xt_multiport_init(void) 253static int __init multiport_mt_init(void)
271{ 254{
272 return xt_register_matches(xt_multiport_match, 255 return xt_register_matches(multiport_mt_reg,
273 ARRAY_SIZE(xt_multiport_match)); 256 ARRAY_SIZE(multiport_mt_reg));
274} 257}
275 258
276static void __exit xt_multiport_fini(void) 259static void __exit multiport_mt_exit(void)
277{ 260{
278 xt_unregister_matches(xt_multiport_match, 261 xt_unregister_matches(multiport_mt_reg, ARRAY_SIZE(multiport_mt_reg));
279 ARRAY_SIZE(xt_multiport_match));
280} 262}
281 263
282module_init(xt_multiport_init); 264module_init(multiport_mt_init);
283module_exit(xt_multiport_fini); 265module_exit(multiport_mt_exit);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
new file mode 100644
index 000000000000..9059c16144c3
--- /dev/null
+++ b/net/netfilter/xt_owner.c
@@ -0,0 +1,213 @@
1/*
2 * Kernel module to match various things tied to sockets associated with
3 * locally generated outgoing packets.
4 *
5 * (C) 2000 Marc Boucher <marc@mbsi.ca>
6 *
7 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
8 * <jengelh@computergmbh.de>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/file.h>
17#include <net/sock.h>
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/xt_owner.h>
20#include <linux/netfilter_ipv4/ipt_owner.h>
21#include <linux/netfilter_ipv6/ip6t_owner.h>
22
23static bool
24owner_mt_v0(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *out, const struct xt_match *match,
26 const void *matchinfo, int offset, unsigned int protoff,
27 bool *hotdrop)
28{
29 const struct ipt_owner_info *info = matchinfo;
30 const struct file *filp;
31
32 if (skb->sk == NULL || skb->sk->sk_socket == NULL)
33 return false;
34
35 filp = skb->sk->sk_socket->file;
36 if (filp == NULL)
37 return false;
38
39 if (info->match & IPT_OWNER_UID)
40 if ((filp->f_uid != info->uid) ^
41 !!(info->invert & IPT_OWNER_UID))
42 return false;
43
44 if (info->match & IPT_OWNER_GID)
45 if ((filp->f_gid != info->gid) ^
46 !!(info->invert & IPT_OWNER_GID))
47 return false;
48
49 return true;
50}
51
52static bool
53owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in,
54 const struct net_device *out, const struct xt_match *match,
55 const void *matchinfo, int offset, unsigned int protoff,
56 bool *hotdrop)
57{
58 const struct ip6t_owner_info *info = matchinfo;
59 const struct file *filp;
60
61 if (skb->sk == NULL || skb->sk->sk_socket == NULL)
62 return false;
63
64 filp = skb->sk->sk_socket->file;
65 if (filp == NULL)
66 return false;
67
68 if (info->match & IP6T_OWNER_UID)
69 if ((filp->f_uid != info->uid) ^
70 !!(info->invert & IP6T_OWNER_UID))
71 return false;
72
73 if (info->match & IP6T_OWNER_GID)
74 if ((filp->f_gid != info->gid) ^
75 !!(info->invert & IP6T_OWNER_GID))
76 return false;
77
78 return true;
79}
80
81static bool
82owner_mt(const struct sk_buff *skb, const struct net_device *in,
83 const struct net_device *out, const struct xt_match *match,
84 const void *matchinfo, int offset, unsigned int protoff,
85 bool *hotdrop)
86{
87 const struct xt_owner_match_info *info = matchinfo;
88 const struct file *filp;
89
90 if (skb->sk == NULL || skb->sk->sk_socket == NULL)
91 return (info->match ^ info->invert) == 0;
92 else if (info->match & info->invert & XT_OWNER_SOCKET)
93 /*
94 * Socket exists but user wanted ! --socket-exists.
95 * (Single ampersands intended.)
96 */
97 return false;
98
99 filp = skb->sk->sk_socket->file;
100 if (filp == NULL)
101 return ((info->match ^ info->invert) &
102 (XT_OWNER_UID | XT_OWNER_GID)) == 0;
103
104 if (info->match & XT_OWNER_UID)
105 if ((filp->f_uid >= info->uid_min &&
106 filp->f_uid <= info->uid_max) ^
107 !(info->invert & XT_OWNER_UID))
108 return false;
109
110 if (info->match & XT_OWNER_GID)
111 if ((filp->f_gid >= info->gid_min &&
112 filp->f_gid <= info->gid_max) ^
113 !(info->invert & XT_OWNER_GID))
114 return false;
115
116 return true;
117}
118
119static bool
120owner_mt_check_v0(const char *tablename, const void *ip,
121 const struct xt_match *match, void *matchinfo,
122 unsigned int hook_mask)
123{
124 const struct ipt_owner_info *info = matchinfo;
125
126 if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) {
127 printk(KERN_WARNING KBUILD_MODNAME
128 ": PID, SID and command matching is not "
129 "supported anymore\n");
130 return false;
131 }
132
133 return true;
134}
135
136static bool
137owner_mt6_check_v0(const char *tablename, const void *ip,
138 const struct xt_match *match, void *matchinfo,
139 unsigned int hook_mask)
140{
141 const struct ip6t_owner_info *info = matchinfo;
142
143 if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
144 printk(KERN_WARNING KBUILD_MODNAME
145 ": PID and SID matching is not supported anymore\n");
146 return false;
147 }
148
149 return true;
150}
151
152static struct xt_match owner_mt_reg[] __read_mostly = {
153 {
154 .name = "owner",
155 .revision = 0,
156 .family = AF_INET,
157 .match = owner_mt_v0,
158 .matchsize = sizeof(struct ipt_owner_info),
159 .checkentry = owner_mt_check_v0,
160 .hooks = (1 << NF_INET_LOCAL_OUT) |
161 (1 << NF_INET_POST_ROUTING),
162 .me = THIS_MODULE,
163 },
164 {
165 .name = "owner",
166 .revision = 0,
167 .family = AF_INET6,
168 .match = owner_mt6_v0,
169 .matchsize = sizeof(struct ip6t_owner_info),
170 .checkentry = owner_mt6_check_v0,
171 .hooks = (1 << NF_INET_LOCAL_OUT) |
172 (1 << NF_INET_POST_ROUTING),
173 .me = THIS_MODULE,
174 },
175 {
176 .name = "owner",
177 .revision = 1,
178 .family = AF_INET,
179 .match = owner_mt,
180 .matchsize = sizeof(struct xt_owner_match_info),
181 .hooks = (1 << NF_INET_LOCAL_OUT) |
182 (1 << NF_INET_POST_ROUTING),
183 .me = THIS_MODULE,
184 },
185 {
186 .name = "owner",
187 .revision = 1,
188 .family = AF_INET6,
189 .match = owner_mt,
190 .matchsize = sizeof(struct xt_owner_match_info),
191 .hooks = (1 << NF_INET_LOCAL_OUT) |
192 (1 << NF_INET_POST_ROUTING),
193 .me = THIS_MODULE,
194 },
195};
196
197static int __init owner_mt_init(void)
198{
199 return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
200}
201
202static void __exit owner_mt_exit(void)
203{
204 xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
205}
206
207module_init(owner_mt_init);
208module_exit(owner_mt_exit);
209MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
210MODULE_DESCRIPTION("Xtables: socket owner matching");
211MODULE_LICENSE("GPL");
212MODULE_ALIAS("ipt_owner");
213MODULE_ALIAS("ip6t_owner");
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index a4bab043a6d1..4ec1094bda92 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -16,19 +16,15 @@
16 16
17MODULE_LICENSE("GPL"); 17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 18MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
19MODULE_DESCRIPTION("iptables bridge physical device match module"); 19MODULE_DESCRIPTION("Xtables: Bridge physical device match");
20MODULE_ALIAS("ipt_physdev"); 20MODULE_ALIAS("ipt_physdev");
21MODULE_ALIAS("ip6t_physdev"); 21MODULE_ALIAS("ip6t_physdev");
22 22
23static bool 23static bool
24match(const struct sk_buff *skb, 24physdev_mt(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *in, 25 const struct net_device *out, const struct xt_match *match,
26 const struct net_device *out, 26 const void *matchinfo, int offset, unsigned int protoff,
27 const struct xt_match *match, 27 bool *hotdrop)
28 const void *matchinfo,
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 28{
33 int i; 29 int i;
34 static const char nulldevname[IFNAMSIZ]; 30 static const char nulldevname[IFNAMSIZ];
@@ -99,11 +95,9 @@ match_outdev:
99} 95}
100 96
101static bool 97static bool
102checkentry(const char *tablename, 98physdev_mt_check(const char *tablename, const void *ip,
103 const void *ip, 99 const struct xt_match *match, void *matchinfo,
104 const struct xt_match *match, 100 unsigned int hook_mask)
105 void *matchinfo,
106 unsigned int hook_mask)
107{ 101{
108 const struct xt_physdev_info *info = matchinfo; 102 const struct xt_physdev_info *info = matchinfo;
109 103
@@ -113,46 +107,45 @@ checkentry(const char *tablename,
113 if (info->bitmask & XT_PHYSDEV_OP_OUT && 107 if (info->bitmask & XT_PHYSDEV_OP_OUT &&
114 (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || 108 (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
115 info->invert & XT_PHYSDEV_OP_BRIDGED) && 109 info->invert & XT_PHYSDEV_OP_BRIDGED) &&
116 hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | 110 hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
117 (1 << NF_IP_POST_ROUTING))) { 111 (1 << NF_INET_POST_ROUTING))) {
118 printk(KERN_WARNING "physdev match: using --physdev-out in the " 112 printk(KERN_WARNING "physdev match: using --physdev-out in the "
119 "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " 113 "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
120 "traffic is not supported anymore.\n"); 114 "traffic is not supported anymore.\n");
121 if (hook_mask & (1 << NF_IP_LOCAL_OUT)) 115 if (hook_mask & (1 << NF_INET_LOCAL_OUT))
122 return false; 116 return false;
123 } 117 }
124 return true; 118 return true;
125} 119}
126 120
127static struct xt_match xt_physdev_match[] __read_mostly = { 121static struct xt_match physdev_mt_reg[] __read_mostly = {
128 { 122 {
129 .name = "physdev", 123 .name = "physdev",
130 .family = AF_INET, 124 .family = AF_INET,
131 .checkentry = checkentry, 125 .checkentry = physdev_mt_check,
132 .match = match, 126 .match = physdev_mt,
133 .matchsize = sizeof(struct xt_physdev_info), 127 .matchsize = sizeof(struct xt_physdev_info),
134 .me = THIS_MODULE, 128 .me = THIS_MODULE,
135 }, 129 },
136 { 130 {
137 .name = "physdev", 131 .name = "physdev",
138 .family = AF_INET6, 132 .family = AF_INET6,
139 .checkentry = checkentry, 133 .checkentry = physdev_mt_check,
140 .match = match, 134 .match = physdev_mt,
141 .matchsize = sizeof(struct xt_physdev_info), 135 .matchsize = sizeof(struct xt_physdev_info),
142 .me = THIS_MODULE, 136 .me = THIS_MODULE,
143 }, 137 },
144}; 138};
145 139
146static int __init xt_physdev_init(void) 140static int __init physdev_mt_init(void)
147{ 141{
148 return xt_register_matches(xt_physdev_match, 142 return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
149 ARRAY_SIZE(xt_physdev_match));
150} 143}
151 144
152static void __exit xt_physdev_fini(void) 145static void __exit physdev_mt_exit(void)
153{ 146{
154 xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match)); 147 xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
155} 148}
156 149
157module_init(xt_physdev_init); 150module_init(physdev_mt_init);
158module_exit(xt_physdev_fini); 151module_exit(physdev_mt_exit);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index a52925f12f35..7936f7e23254 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -11,65 +11,66 @@
11#include <linux/if_packet.h> 11#include <linux/if_packet.h>
12#include <linux/in.h> 12#include <linux/in.h>
13#include <linux/ip.h> 13#include <linux/ip.h>
14#include <linux/ipv6.h>
14 15
15#include <linux/netfilter/xt_pkttype.h> 16#include <linux/netfilter/xt_pkttype.h>
16#include <linux/netfilter/x_tables.h> 17#include <linux/netfilter/x_tables.h>
17 18
18MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>"); 20MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
20MODULE_DESCRIPTION("IP tables match to match on linklayer packet type"); 21MODULE_DESCRIPTION("Xtables: link layer packet type match");
21MODULE_ALIAS("ipt_pkttype"); 22MODULE_ALIAS("ipt_pkttype");
22MODULE_ALIAS("ip6t_pkttype"); 23MODULE_ALIAS("ip6t_pkttype");
23 24
24static bool match(const struct sk_buff *skb, 25static bool
25 const struct net_device *in, 26pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *out, 27 const struct net_device *out, const struct xt_match *match,
27 const struct xt_match *match, 28 const void *matchinfo, int offset, unsigned int protoff,
28 const void *matchinfo, 29 bool *hotdrop)
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 30{
33 u_int8_t type;
34 const struct xt_pkttype_info *info = matchinfo; 31 const struct xt_pkttype_info *info = matchinfo;
32 u_int8_t type;
35 33
36 if (skb->pkt_type == PACKET_LOOPBACK) 34 if (skb->pkt_type != PACKET_LOOPBACK)
37 type = MULTICAST(ip_hdr(skb)->daddr)
38 ? PACKET_MULTICAST
39 : PACKET_BROADCAST;
40 else
41 type = skb->pkt_type; 35 type = skb->pkt_type;
36 else if (match->family == AF_INET &&
37 ipv4_is_multicast(ip_hdr(skb)->daddr))
38 type = PACKET_MULTICAST;
39 else if (match->family == AF_INET6 &&
40 ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
41 type = PACKET_MULTICAST;
42 else
43 type = PACKET_BROADCAST;
42 44
43 return (type == info->pkttype) ^ info->invert; 45 return (type == info->pkttype) ^ info->invert;
44} 46}
45 47
46static struct xt_match xt_pkttype_match[] __read_mostly = { 48static struct xt_match pkttype_mt_reg[] __read_mostly = {
47 { 49 {
48 .name = "pkttype", 50 .name = "pkttype",
49 .family = AF_INET, 51 .family = AF_INET,
50 .match = match, 52 .match = pkttype_mt,
51 .matchsize = sizeof(struct xt_pkttype_info), 53 .matchsize = sizeof(struct xt_pkttype_info),
52 .me = THIS_MODULE, 54 .me = THIS_MODULE,
53 }, 55 },
54 { 56 {
55 .name = "pkttype", 57 .name = "pkttype",
56 .family = AF_INET6, 58 .family = AF_INET6,
57 .match = match, 59 .match = pkttype_mt,
58 .matchsize = sizeof(struct xt_pkttype_info), 60 .matchsize = sizeof(struct xt_pkttype_info),
59 .me = THIS_MODULE, 61 .me = THIS_MODULE,
60 }, 62 },
61}; 63};
62 64
63static int __init xt_pkttype_init(void) 65static int __init pkttype_mt_init(void)
64{ 66{
65 return xt_register_matches(xt_pkttype_match, 67 return xt_register_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
66 ARRAY_SIZE(xt_pkttype_match));
67} 68}
68 69
69static void __exit xt_pkttype_fini(void) 70static void __exit pkttype_mt_exit(void)
70{ 71{
71 xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match)); 72 xt_unregister_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
72} 73}
73 74
74module_init(xt_pkttype_init); 75module_init(pkttype_mt_init);
75module_exit(xt_pkttype_fini); 76module_exit(pkttype_mt_exit);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 6d6d3b7fcbb5..9e918add2282 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -13,37 +13,38 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <net/xfrm.h> 14#include <net/xfrm.h>
15 15
16#include <linux/netfilter.h>
16#include <linux/netfilter/xt_policy.h> 17#include <linux/netfilter/xt_policy.h>
17#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
18 19
19MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 20MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
20MODULE_DESCRIPTION("Xtables IPsec policy matching module"); 21MODULE_DESCRIPTION("Xtables: IPsec policy match");
21MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
22 23
23static inline bool 24static inline bool
24xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m, 25xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m,
25 const union xt_policy_addr *a2, unsigned short family) 26 const union nf_inet_addr *a2, unsigned short family)
26{ 27{
27 switch (family) { 28 switch (family) {
28 case AF_INET: 29 case AF_INET:
29 return !((a1->a4.s_addr ^ a2->a4.s_addr) & m->a4.s_addr); 30 return ((a1->ip ^ a2->ip) & m->ip) == 0;
30 case AF_INET6: 31 case AF_INET6:
31 return !ipv6_masked_addr_cmp(&a1->a6, &m->a6, &a2->a6); 32 return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0;
32 } 33 }
33 return false; 34 return false;
34} 35}
35 36
36static inline bool 37static bool
37match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e, 38match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
38 unsigned short family) 39 unsigned short family)
39{ 40{
40#define MATCH_ADDR(x,y,z) (!e->match.x || \ 41#define MATCH_ADDR(x,y,z) (!e->match.x || \
41 (xt_addr_cmp(&e->x, &e->y, z, family) \ 42 (xt_addr_cmp(&e->x, &e->y, (const union nf_inet_addr *)(z), family) \
42 ^ e->invert.x)) 43 ^ e->invert.x))
43#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) 44#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
44 45
45 return MATCH_ADDR(saddr, smask, (union xt_policy_addr *)&x->props.saddr) && 46 return MATCH_ADDR(saddr, smask, &x->props.saddr) &&
46 MATCH_ADDR(daddr, dmask, (union xt_policy_addr *)&x->id.daddr) && 47 MATCH_ADDR(daddr, dmask, &x->id.daddr) &&
47 MATCH(proto, x->id.proto) && 48 MATCH(proto, x->id.proto) &&
48 MATCH(mode, x->props.mode) && 49 MATCH(mode, x->props.mode) &&
49 MATCH(spi, x->id.spi) && 50 MATCH(spi, x->id.spi) &&
@@ -108,14 +109,11 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
108 return strict ? i == info->len : 0; 109 return strict ? i == info->len : 0;
109} 110}
110 111
111static bool match(const struct sk_buff *skb, 112static bool
112 const struct net_device *in, 113policy_mt(const struct sk_buff *skb, const struct net_device *in,
113 const struct net_device *out, 114 const struct net_device *out, const struct xt_match *match,
114 const struct xt_match *match, 115 const void *matchinfo, int offset, unsigned int protoff,
115 const void *matchinfo, 116 bool *hotdrop)
116 int offset,
117 unsigned int protoff,
118 bool *hotdrop)
119{ 117{
120 const struct xt_policy_info *info = matchinfo; 118 const struct xt_policy_info *info = matchinfo;
121 int ret; 119 int ret;
@@ -133,9 +131,10 @@ static bool match(const struct sk_buff *skb,
133 return ret; 131 return ret;
134} 132}
135 133
136static bool checkentry(const char *tablename, const void *ip_void, 134static bool
137 const struct xt_match *match, 135policy_mt_check(const char *tablename, const void *ip_void,
138 void *matchinfo, unsigned int hook_mask) 136 const struct xt_match *match, void *matchinfo,
137 unsigned int hook_mask)
139{ 138{
140 struct xt_policy_info *info = matchinfo; 139 struct xt_policy_info *info = matchinfo;
141 140
@@ -144,14 +143,13 @@ static bool checkentry(const char *tablename, const void *ip_void,
144 "outgoing policy selected\n"); 143 "outgoing policy selected\n");
145 return false; 144 return false;
146 } 145 }
147 /* hook values are equal for IPv4 and IPv6 */ 146 if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN)
148 if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
149 && info->flags & XT_POLICY_MATCH_OUT) { 147 && info->flags & XT_POLICY_MATCH_OUT) {
150 printk(KERN_ERR "xt_policy: output policy not valid in " 148 printk(KERN_ERR "xt_policy: output policy not valid in "
151 "PRE_ROUTING and INPUT\n"); 149 "PRE_ROUTING and INPUT\n");
152 return false; 150 return false;
153 } 151 }
154 if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) 152 if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT)
155 && info->flags & XT_POLICY_MATCH_IN) { 153 && info->flags & XT_POLICY_MATCH_IN) {
156 printk(KERN_ERR "xt_policy: input policy not valid in " 154 printk(KERN_ERR "xt_policy: input policy not valid in "
157 "POST_ROUTING and OUTPUT\n"); 155 "POST_ROUTING and OUTPUT\n");
@@ -164,37 +162,36 @@ static bool checkentry(const char *tablename, const void *ip_void,
164 return true; 162 return true;
165} 163}
166 164
167static struct xt_match xt_policy_match[] __read_mostly = { 165static struct xt_match policy_mt_reg[] __read_mostly = {
168 { 166 {
169 .name = "policy", 167 .name = "policy",
170 .family = AF_INET, 168 .family = AF_INET,
171 .checkentry = checkentry, 169 .checkentry = policy_mt_check,
172 .match = match, 170 .match = policy_mt,
173 .matchsize = sizeof(struct xt_policy_info), 171 .matchsize = sizeof(struct xt_policy_info),
174 .me = THIS_MODULE, 172 .me = THIS_MODULE,
175 }, 173 },
176 { 174 {
177 .name = "policy", 175 .name = "policy",
178 .family = AF_INET6, 176 .family = AF_INET6,
179 .checkentry = checkentry, 177 .checkentry = policy_mt_check,
180 .match = match, 178 .match = policy_mt,
181 .matchsize = sizeof(struct xt_policy_info), 179 .matchsize = sizeof(struct xt_policy_info),
182 .me = THIS_MODULE, 180 .me = THIS_MODULE,
183 }, 181 },
184}; 182};
185 183
186static int __init init(void) 184static int __init policy_mt_init(void)
187{ 185{
188 return xt_register_matches(xt_policy_match, 186 return xt_register_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
189 ARRAY_SIZE(xt_policy_match));
190} 187}
191 188
192static void __exit fini(void) 189static void __exit policy_mt_exit(void)
193{ 190{
194 xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match)); 191 xt_unregister_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
195} 192}
196 193
197module_init(init); 194module_init(policy_mt_init);
198module_exit(fini); 195module_exit(policy_mt_exit);
199MODULE_ALIAS("ipt_policy"); 196MODULE_ALIAS("ipt_policy");
200MODULE_ALIAS("ip6t_policy"); 197MODULE_ALIAS("ip6t_policy");
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index dae97445b87b..3b021d0c522a 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,16 +11,17 @@
11 11
12MODULE_LICENSE("GPL"); 12MODULE_LICENSE("GPL");
13MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); 13MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
14MODULE_DESCRIPTION("Xtables: countdown quota match");
14MODULE_ALIAS("ipt_quota"); 15MODULE_ALIAS("ipt_quota");
15MODULE_ALIAS("ip6t_quota"); 16MODULE_ALIAS("ip6t_quota");
16 17
17static DEFINE_SPINLOCK(quota_lock); 18static DEFINE_SPINLOCK(quota_lock);
18 19
19static bool 20static bool
20match(const struct sk_buff *skb, 21quota_mt(const struct sk_buff *skb, const struct net_device *in,
21 const struct net_device *in, const struct net_device *out, 22 const struct net_device *out, const struct xt_match *match,
22 const struct xt_match *match, const void *matchinfo, 23 const void *matchinfo, int offset, unsigned int protoff,
23 int offset, unsigned int protoff, bool *hotdrop) 24 bool *hotdrop)
24{ 25{
25 struct xt_quota_info *q = 26 struct xt_quota_info *q =
26 ((const struct xt_quota_info *)matchinfo)->master; 27 ((const struct xt_quota_info *)matchinfo)->master;
@@ -40,9 +41,9 @@ match(const struct sk_buff *skb,
40} 41}
41 42
42static bool 43static bool
43checkentry(const char *tablename, const void *entry, 44quota_mt_check(const char *tablename, const void *entry,
44 const struct xt_match *match, void *matchinfo, 45 const struct xt_match *match, void *matchinfo,
45 unsigned int hook_mask) 46 unsigned int hook_mask)
46{ 47{
47 struct xt_quota_info *q = matchinfo; 48 struct xt_quota_info *q = matchinfo;
48 49
@@ -53,34 +54,34 @@ checkentry(const char *tablename, const void *entry,
53 return true; 54 return true;
54} 55}
55 56
56static struct xt_match xt_quota_match[] __read_mostly = { 57static struct xt_match quota_mt_reg[] __read_mostly = {
57 { 58 {
58 .name = "quota", 59 .name = "quota",
59 .family = AF_INET, 60 .family = AF_INET,
60 .checkentry = checkentry, 61 .checkentry = quota_mt_check,
61 .match = match, 62 .match = quota_mt,
62 .matchsize = sizeof(struct xt_quota_info), 63 .matchsize = sizeof(struct xt_quota_info),
63 .me = THIS_MODULE 64 .me = THIS_MODULE
64 }, 65 },
65 { 66 {
66 .name = "quota", 67 .name = "quota",
67 .family = AF_INET6, 68 .family = AF_INET6,
68 .checkentry = checkentry, 69 .checkentry = quota_mt_check,
69 .match = match, 70 .match = quota_mt,
70 .matchsize = sizeof(struct xt_quota_info), 71 .matchsize = sizeof(struct xt_quota_info),
71 .me = THIS_MODULE 72 .me = THIS_MODULE
72 }, 73 },
73}; 74};
74 75
75static int __init xt_quota_init(void) 76static int __init quota_mt_init(void)
76{ 77{
77 return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); 78 return xt_register_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
78} 79}
79 80
80static void __exit xt_quota_fini(void) 81static void __exit quota_mt_exit(void)
81{ 82{
82 xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); 83 xt_unregister_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
83} 84}
84 85
85module_init(xt_quota_init); 86module_init(quota_mt_init);
86module_exit(xt_quota_fini); 87module_exit(quota_mt_exit);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
new file mode 100644
index 000000000000..fdb86a515146
--- /dev/null
+++ b/net/netfilter/xt_rateest.c
@@ -0,0 +1,178 @@
1/*
2 * (C) 2007 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/gen_stats.h>
11
12#include <linux/netfilter/x_tables.h>
13#include <linux/netfilter/xt_rateest.h>
14#include <net/netfilter/xt_rateest.h>
15
16
17static bool xt_rateest_mt(const struct sk_buff *skb,
18 const struct net_device *in,
19 const struct net_device *out,
20 const struct xt_match *match,
21 const void *matchinfo,
22 int offset,
23 unsigned int protoff,
24 bool *hotdrop)
25{
26 const struct xt_rateest_match_info *info = matchinfo;
27 struct gnet_stats_rate_est *r;
28 u_int32_t bps1, bps2, pps1, pps2;
29 bool ret = true;
30
31 spin_lock_bh(&info->est1->lock);
32 r = &info->est1->rstats;
33 if (info->flags & XT_RATEEST_MATCH_DELTA) {
34 bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0;
35 pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0;
36 } else {
37 bps1 = r->bps;
38 pps1 = r->pps;
39 }
40 spin_unlock_bh(&info->est1->lock);
41
42 if (info->flags & XT_RATEEST_MATCH_ABS) {
43 bps2 = info->bps2;
44 pps2 = info->pps2;
45 } else {
46 spin_lock_bh(&info->est2->lock);
47 r = &info->est2->rstats;
48 if (info->flags & XT_RATEEST_MATCH_DELTA) {
49 bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0;
50 pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0;
51 } else {
52 bps2 = r->bps;
53 pps2 = r->pps;
54 }
55 spin_unlock_bh(&info->est2->lock);
56 }
57
58 switch (info->mode) {
59 case XT_RATEEST_MATCH_LT:
60 if (info->flags & XT_RATEEST_MATCH_BPS)
61 ret &= bps1 < bps2;
62 if (info->flags & XT_RATEEST_MATCH_PPS)
63 ret &= pps1 < pps2;
64 break;
65 case XT_RATEEST_MATCH_GT:
66 if (info->flags & XT_RATEEST_MATCH_BPS)
67 ret &= bps1 > bps2;
68 if (info->flags & XT_RATEEST_MATCH_PPS)
69 ret &= pps1 > pps2;
70 break;
71 case XT_RATEEST_MATCH_EQ:
72 if (info->flags & XT_RATEEST_MATCH_BPS)
73 ret &= bps1 == bps2;
74 if (info->flags & XT_RATEEST_MATCH_PPS)
75 ret &= pps2 == pps2;
76 break;
77 }
78
79 ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false;
80 return ret;
81}
82
83static bool xt_rateest_mt_checkentry(const char *tablename,
84 const void *ip,
85 const struct xt_match *match,
86 void *matchinfo,
87 unsigned int hook_mask)
88{
89 struct xt_rateest_match_info *info = (void *)matchinfo;
90 struct xt_rateest *est1, *est2;
91
92 if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
93 XT_RATEEST_MATCH_REL)) != 1)
94 goto err1;
95
96 if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS)))
97 goto err1;
98
99 switch (info->mode) {
100 case XT_RATEEST_MATCH_EQ:
101 case XT_RATEEST_MATCH_LT:
102 case XT_RATEEST_MATCH_GT:
103 break;
104 default:
105 goto err1;
106 }
107
108 est1 = xt_rateest_lookup(info->name1);
109 if (!est1)
110 goto err1;
111
112 if (info->flags & XT_RATEEST_MATCH_REL) {
113 est2 = xt_rateest_lookup(info->name2);
114 if (!est2)
115 goto err2;
116 } else
117 est2 = NULL;
118
119
120 info->est1 = est1;
121 info->est2 = est2;
122 return true;
123
124err2:
125 xt_rateest_put(est1);
126err1:
127 return false;
128}
129
130static void xt_rateest_mt_destroy(const struct xt_match *match,
131 void *matchinfo)
132{
133 struct xt_rateest_match_info *info = (void *)matchinfo;
134
135 xt_rateest_put(info->est1);
136 if (info->est2)
137 xt_rateest_put(info->est2);
138}
139
140static struct xt_match xt_rateest_match[] __read_mostly = {
141 {
142 .family = AF_INET,
143 .name = "rateest",
144 .match = xt_rateest_mt,
145 .checkentry = xt_rateest_mt_checkentry,
146 .destroy = xt_rateest_mt_destroy,
147 .matchsize = sizeof(struct xt_rateest_match_info),
148 .me = THIS_MODULE,
149 },
150 {
151 .family = AF_INET6,
152 .name = "rateest",
153 .match = xt_rateest_mt,
154 .checkentry = xt_rateest_mt_checkentry,
155 .destroy = xt_rateest_mt_destroy,
156 .matchsize = sizeof(struct xt_rateest_match_info),
157 .me = THIS_MODULE,
158 },
159};
160
161static int __init xt_rateest_mt_init(void)
162{
163 return xt_register_matches(xt_rateest_match,
164 ARRAY_SIZE(xt_rateest_match));
165}
166
167static void __exit xt_rateest_mt_fini(void)
168{
169 xt_unregister_matches(xt_rateest_match, ARRAY_SIZE(xt_rateest_match));
170}
171
172MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
173MODULE_LICENSE("GPL");
174MODULE_DESCRIPTION("xtables rate estimator match");
175MODULE_ALIAS("ipt_rateest");
176MODULE_ALIAS("ip6t_rateest");
177module_init(xt_rateest_mt_init);
178module_exit(xt_rateest_mt_fini);
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index cc3e76d77a99..7df1627c536f 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -18,18 +18,14 @@
18 18
19MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>"); 19MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21MODULE_DESCRIPTION("X_tables realm match"); 21MODULE_DESCRIPTION("Xtables: Routing realm match");
22MODULE_ALIAS("ipt_realm"); 22MODULE_ALIAS("ipt_realm");
23 23
24static bool 24static bool
25match(const struct sk_buff *skb, 25realm_mt(const struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *in, 26 const struct net_device *out, const struct xt_match *match,
27 const struct net_device *out, 27 const void *matchinfo, int offset, unsigned int protoff,
28 const struct xt_match *match, 28 bool *hotdrop)
29 const void *matchinfo,
30 int offset,
31 unsigned int protoff,
32 bool *hotdrop)
33{ 29{
34 const struct xt_realm_info *info = matchinfo; 30 const struct xt_realm_info *info = matchinfo;
35 const struct dst_entry *dst = skb->dst; 31 const struct dst_entry *dst = skb->dst;
@@ -37,25 +33,25 @@ match(const struct sk_buff *skb,
37 return (info->id == (dst->tclassid & info->mask)) ^ info->invert; 33 return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
38} 34}
39 35
40static struct xt_match realm_match __read_mostly = { 36static struct xt_match realm_mt_reg __read_mostly = {
41 .name = "realm", 37 .name = "realm",
42 .match = match, 38 .match = realm_mt,
43 .matchsize = sizeof(struct xt_realm_info), 39 .matchsize = sizeof(struct xt_realm_info),
44 .hooks = (1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) | 40 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
45 (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN), 41 (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
46 .family = AF_INET, 42 .family = AF_INET,
47 .me = THIS_MODULE 43 .me = THIS_MODULE
48}; 44};
49 45
50static int __init xt_realm_init(void) 46static int __init realm_mt_init(void)
51{ 47{
52 return xt_register_match(&realm_match); 48 return xt_register_match(&realm_mt_reg);
53} 49}
54 50
55static void __exit xt_realm_fini(void) 51static void __exit realm_mt_exit(void)
56{ 52{
57 xt_unregister_match(&realm_match); 53 xt_unregister_match(&realm_mt_reg);
58} 54}
59 55
60module_init(xt_realm_init); 56module_init(realm_mt_init);
61module_exit(xt_realm_fini); 57module_exit(realm_mt_exit);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 3358273a47b7..b718ec64333d 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -11,7 +11,7 @@
11 11
12MODULE_LICENSE("GPL"); 12MODULE_LICENSE("GPL");
13MODULE_AUTHOR("Kiran Kumar Immidi"); 13MODULE_AUTHOR("Kiran Kumar Immidi");
14MODULE_DESCRIPTION("Match for SCTP protocol packets"); 14MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
15MODULE_ALIAS("ipt_sctp"); 15MODULE_ALIAS("ipt_sctp");
16MODULE_ALIAS("ip6t_sctp"); 16MODULE_ALIAS("ip6t_sctp");
17 17
@@ -116,14 +116,9 @@ match_packet(const struct sk_buff *skb,
116} 116}
117 117
118static bool 118static bool
119match(const struct sk_buff *skb, 119sctp_mt(const struct sk_buff *skb, const struct net_device *in,
120 const struct net_device *in, 120 const struct net_device *out, const struct xt_match *match,
121 const struct net_device *out, 121 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
122 const struct xt_match *match,
123 const void *matchinfo,
124 int offset,
125 unsigned int protoff,
126 bool *hotdrop)
127{ 122{
128 const struct xt_sctp_info *info = matchinfo; 123 const struct xt_sctp_info *info = matchinfo;
129 sctp_sctphdr_t _sh, *sh; 124 sctp_sctphdr_t _sh, *sh;
@@ -153,11 +148,9 @@ match(const struct sk_buff *skb,
153} 148}
154 149
155static bool 150static bool
156checkentry(const char *tablename, 151sctp_mt_check(const char *tablename, const void *inf,
157 const void *inf, 152 const struct xt_match *match, void *matchinfo,
158 const struct xt_match *match, 153 unsigned int hook_mask)
159 void *matchinfo,
160 unsigned int hook_mask)
161{ 154{
162 const struct xt_sctp_info *info = matchinfo; 155 const struct xt_sctp_info *info = matchinfo;
163 156
@@ -171,12 +164,12 @@ checkentry(const char *tablename,
171 | SCTP_CHUNK_MATCH_ONLY))); 164 | SCTP_CHUNK_MATCH_ONLY)));
172} 165}
173 166
174static struct xt_match xt_sctp_match[] __read_mostly = { 167static struct xt_match sctp_mt_reg[] __read_mostly = {
175 { 168 {
176 .name = "sctp", 169 .name = "sctp",
177 .family = AF_INET, 170 .family = AF_INET,
178 .checkentry = checkentry, 171 .checkentry = sctp_mt_check,
179 .match = match, 172 .match = sctp_mt,
180 .matchsize = sizeof(struct xt_sctp_info), 173 .matchsize = sizeof(struct xt_sctp_info),
181 .proto = IPPROTO_SCTP, 174 .proto = IPPROTO_SCTP,
182 .me = THIS_MODULE 175 .me = THIS_MODULE
@@ -184,23 +177,23 @@ static struct xt_match xt_sctp_match[] __read_mostly = {
184 { 177 {
185 .name = "sctp", 178 .name = "sctp",
186 .family = AF_INET6, 179 .family = AF_INET6,
187 .checkentry = checkentry, 180 .checkentry = sctp_mt_check,
188 .match = match, 181 .match = sctp_mt,
189 .matchsize = sizeof(struct xt_sctp_info), 182 .matchsize = sizeof(struct xt_sctp_info),
190 .proto = IPPROTO_SCTP, 183 .proto = IPPROTO_SCTP,
191 .me = THIS_MODULE 184 .me = THIS_MODULE
192 }, 185 },
193}; 186};
194 187
195static int __init xt_sctp_init(void) 188static int __init sctp_mt_init(void)
196{ 189{
197 return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); 190 return xt_register_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
198} 191}
199 192
200static void __exit xt_sctp_fini(void) 193static void __exit sctp_mt_exit(void)
201{ 194{
202 xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); 195 xt_unregister_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
203} 196}
204 197
205module_init(xt_sctp_init); 198module_init(sctp_mt_init);
206module_exit(xt_sctp_fini); 199module_exit(sctp_mt_exit);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e0a528df19a7..a776dc36a193 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,14 +21,10 @@ MODULE_ALIAS("ipt_state");
21MODULE_ALIAS("ip6t_state"); 21MODULE_ALIAS("ip6t_state");
22 22
23static bool 23static bool
24match(const struct sk_buff *skb, 24state_mt(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *in, 25 const struct net_device *out, const struct xt_match *match,
26 const struct net_device *out, 26 const void *matchinfo, int offset, unsigned int protoff,
27 const struct xt_match *match, 27 bool *hotdrop)
28 const void *matchinfo,
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 28{
33 const struct xt_state_info *sinfo = matchinfo; 29 const struct xt_state_info *sinfo = matchinfo;
34 enum ip_conntrack_info ctinfo; 30 enum ip_conntrack_info ctinfo;
@@ -44,56 +40,54 @@ match(const struct sk_buff *skb,
44 return (sinfo->statemask & statebit); 40 return (sinfo->statemask & statebit);
45} 41}
46 42
47static bool check(const char *tablename, 43static bool
48 const void *inf, 44state_mt_check(const char *tablename, const void *inf,
49 const struct xt_match *match, 45 const struct xt_match *match, void *matchinfo,
50 void *matchinfo, 46 unsigned int hook_mask)
51 unsigned int hook_mask)
52{ 47{
53 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 48 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
54 printk(KERN_WARNING "can't load conntrack support for " 49 printk(KERN_WARNING "can't load conntrack support for "
55 "proto=%d\n", match->family); 50 "proto=%u\n", match->family);
56 return false; 51 return false;
57 } 52 }
58 return true; 53 return true;
59} 54}
60 55
61static void 56static void state_mt_destroy(const struct xt_match *match, void *matchinfo)
62destroy(const struct xt_match *match, void *matchinfo)
63{ 57{
64 nf_ct_l3proto_module_put(match->family); 58 nf_ct_l3proto_module_put(match->family);
65} 59}
66 60
67static struct xt_match xt_state_match[] __read_mostly = { 61static struct xt_match state_mt_reg[] __read_mostly = {
68 { 62 {
69 .name = "state", 63 .name = "state",
70 .family = AF_INET, 64 .family = AF_INET,
71 .checkentry = check, 65 .checkentry = state_mt_check,
72 .match = match, 66 .match = state_mt,
73 .destroy = destroy, 67 .destroy = state_mt_destroy,
74 .matchsize = sizeof(struct xt_state_info), 68 .matchsize = sizeof(struct xt_state_info),
75 .me = THIS_MODULE, 69 .me = THIS_MODULE,
76 }, 70 },
77 { 71 {
78 .name = "state", 72 .name = "state",
79 .family = AF_INET6, 73 .family = AF_INET6,
80 .checkentry = check, 74 .checkentry = state_mt_check,
81 .match = match, 75 .match = state_mt,
82 .destroy = destroy, 76 .destroy = state_mt_destroy,
83 .matchsize = sizeof(struct xt_state_info), 77 .matchsize = sizeof(struct xt_state_info),
84 .me = THIS_MODULE, 78 .me = THIS_MODULE,
85 }, 79 },
86}; 80};
87 81
88static int __init xt_state_init(void) 82static int __init state_mt_init(void)
89{ 83{
90 return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); 84 return xt_register_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
91} 85}
92 86
93static void __exit xt_state_fini(void) 87static void __exit state_mt_exit(void)
94{ 88{
95 xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); 89 xt_unregister_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
96} 90}
97 91
98module_init(xt_state_init); 92module_init(state_mt_init);
99module_exit(xt_state_fini); 93module_exit(state_mt_exit);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 4089dae4e286..43133080da7d 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -18,17 +18,17 @@
18 18
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 20MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
21MODULE_DESCRIPTION("xtables statistical match module"); 21MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
22MODULE_ALIAS("ipt_statistic"); 22MODULE_ALIAS("ipt_statistic");
23MODULE_ALIAS("ip6t_statistic"); 23MODULE_ALIAS("ip6t_statistic");
24 24
25static DEFINE_SPINLOCK(nth_lock); 25static DEFINE_SPINLOCK(nth_lock);
26 26
27static bool 27static bool
28match(const struct sk_buff *skb, 28statistic_mt(const struct sk_buff *skb, const struct net_device *in,
29 const struct net_device *in, const struct net_device *out, 29 const struct net_device *out, const struct xt_match *match,
30 const struct xt_match *match, const void *matchinfo, 30 const void *matchinfo, int offset, unsigned int protoff,
31 int offset, unsigned int protoff, bool *hotdrop) 31 bool *hotdrop)
32{ 32{
33 struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; 33 struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
34 bool ret = info->flags & XT_STATISTIC_INVERT; 34 bool ret = info->flags & XT_STATISTIC_INVERT;
@@ -53,9 +53,9 @@ match(const struct sk_buff *skb,
53} 53}
54 54
55static bool 55static bool
56checkentry(const char *tablename, const void *entry, 56statistic_mt_check(const char *tablename, const void *entry,
57 const struct xt_match *match, void *matchinfo, 57 const struct xt_match *match, void *matchinfo,
58 unsigned int hook_mask) 58 unsigned int hook_mask)
59{ 59{
60 struct xt_statistic_info *info = matchinfo; 60 struct xt_statistic_info *info = matchinfo;
61 61
@@ -66,36 +66,36 @@ checkentry(const char *tablename, const void *entry,
66 return true; 66 return true;
67} 67}
68 68
69static struct xt_match xt_statistic_match[] __read_mostly = { 69static struct xt_match statistic_mt_reg[] __read_mostly = {
70 { 70 {
71 .name = "statistic", 71 .name = "statistic",
72 .family = AF_INET, 72 .family = AF_INET,
73 .checkentry = checkentry, 73 .checkentry = statistic_mt_check,
74 .match = match, 74 .match = statistic_mt,
75 .matchsize = sizeof(struct xt_statistic_info), 75 .matchsize = sizeof(struct xt_statistic_info),
76 .me = THIS_MODULE, 76 .me = THIS_MODULE,
77 }, 77 },
78 { 78 {
79 .name = "statistic", 79 .name = "statistic",
80 .family = AF_INET6, 80 .family = AF_INET6,
81 .checkentry = checkentry, 81 .checkentry = statistic_mt_check,
82 .match = match, 82 .match = statistic_mt,
83 .matchsize = sizeof(struct xt_statistic_info), 83 .matchsize = sizeof(struct xt_statistic_info),
84 .me = THIS_MODULE, 84 .me = THIS_MODULE,
85 }, 85 },
86}; 86};
87 87
88static int __init xt_statistic_init(void) 88static int __init statistic_mt_init(void)
89{ 89{
90 return xt_register_matches(xt_statistic_match, 90 return xt_register_matches(statistic_mt_reg,
91 ARRAY_SIZE(xt_statistic_match)); 91 ARRAY_SIZE(statistic_mt_reg));
92} 92}
93 93
94static void __exit xt_statistic_fini(void) 94static void __exit statistic_mt_exit(void)
95{ 95{
96 xt_unregister_matches(xt_statistic_match, 96 xt_unregister_matches(statistic_mt_reg,
97 ARRAY_SIZE(xt_statistic_match)); 97 ARRAY_SIZE(statistic_mt_reg));
98} 98}
99 99
100module_init(xt_statistic_init); 100module_init(statistic_mt_init);
101module_exit(xt_statistic_fini); 101module_exit(statistic_mt_exit);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 864133442cda..72f694d947f4 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -16,19 +16,16 @@
16#include <linux/textsearch.h> 16#include <linux/textsearch.h>
17 17
18MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>"); 18MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
19MODULE_DESCRIPTION("IP tables string match module"); 19MODULE_DESCRIPTION("Xtables: string-based matching");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21MODULE_ALIAS("ipt_string"); 21MODULE_ALIAS("ipt_string");
22MODULE_ALIAS("ip6t_string"); 22MODULE_ALIAS("ip6t_string");
23 23
24static bool match(const struct sk_buff *skb, 24static bool
25 const struct net_device *in, 25string_mt(const struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *out, 26 const struct net_device *out, const struct xt_match *match,
27 const struct xt_match *match, 27 const void *matchinfo, int offset, unsigned int protoff,
28 const void *matchinfo, 28 bool *hotdrop)
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 29{
33 const struct xt_string_info *conf = matchinfo; 30 const struct xt_string_info *conf = matchinfo;
34 struct ts_state state; 31 struct ts_state state;
@@ -40,13 +37,12 @@ static bool match(const struct sk_buff *skb,
40 != UINT_MAX) ^ conf->invert; 37 != UINT_MAX) ^ conf->invert;
41} 38}
42 39
43#define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m) 40#define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
44 41
45static bool checkentry(const char *tablename, 42static bool
46 const void *ip, 43string_mt_check(const char *tablename, const void *ip,
47 const struct xt_match *match, 44 const struct xt_match *match, void *matchinfo,
48 void *matchinfo, 45 unsigned int hook_mask)
49 unsigned int hook_mask)
50{ 46{
51 struct xt_string_info *conf = matchinfo; 47 struct xt_string_info *conf = matchinfo;
52 struct ts_config *ts_conf; 48 struct ts_config *ts_conf;
@@ -68,41 +64,41 @@ static bool checkentry(const char *tablename,
68 return true; 64 return true;
69} 65}
70 66
71static void destroy(const struct xt_match *match, void *matchinfo) 67static void string_mt_destroy(const struct xt_match *match, void *matchinfo)
72{ 68{
73 textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); 69 textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
74} 70}
75 71
76static struct xt_match xt_string_match[] __read_mostly = { 72static struct xt_match string_mt_reg[] __read_mostly = {
77 { 73 {
78 .name = "string", 74 .name = "string",
79 .family = AF_INET, 75 .family = AF_INET,
80 .checkentry = checkentry, 76 .checkentry = string_mt_check,
81 .match = match, 77 .match = string_mt,
82 .destroy = destroy, 78 .destroy = string_mt_destroy,
83 .matchsize = sizeof(struct xt_string_info), 79 .matchsize = sizeof(struct xt_string_info),
84 .me = THIS_MODULE 80 .me = THIS_MODULE
85 }, 81 },
86 { 82 {
87 .name = "string", 83 .name = "string",
88 .family = AF_INET6, 84 .family = AF_INET6,
89 .checkentry = checkentry, 85 .checkentry = string_mt_check,
90 .match = match, 86 .match = string_mt,
91 .destroy = destroy, 87 .destroy = string_mt_destroy,
92 .matchsize = sizeof(struct xt_string_info), 88 .matchsize = sizeof(struct xt_string_info),
93 .me = THIS_MODULE 89 .me = THIS_MODULE
94 }, 90 },
95}; 91};
96 92
97static int __init xt_string_init(void) 93static int __init string_mt_init(void)
98{ 94{
99 return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); 95 return xt_register_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
100} 96}
101 97
102static void __exit xt_string_fini(void) 98static void __exit string_mt_exit(void)
103{ 99{
104 xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); 100 xt_unregister_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
105} 101}
106 102
107module_init(xt_string_init); 103module_init(string_mt_init);
108module_exit(xt_string_fini); 104module_exit(string_mt_exit);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 84d401bfafad..d7a5b27fe81e 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -20,19 +20,15 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
23MODULE_DESCRIPTION("iptables TCP MSS match module"); 23MODULE_DESCRIPTION("Xtables: TCP MSS match");
24MODULE_ALIAS("ipt_tcpmss"); 24MODULE_ALIAS("ipt_tcpmss");
25MODULE_ALIAS("ip6t_tcpmss"); 25MODULE_ALIAS("ip6t_tcpmss");
26 26
27static bool 27static bool
28match(const struct sk_buff *skb, 28tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
29 const struct net_device *in, 29 const struct net_device *out, const struct xt_match *match,
30 const struct net_device *out, 30 const void *matchinfo, int offset, unsigned int protoff,
31 const struct xt_match *match, 31 bool *hotdrop)
32 const void *matchinfo,
33 int offset,
34 unsigned int protoff,
35 bool *hotdrop)
36{ 32{
37 const struct xt_tcpmss_match_info *info = matchinfo; 33 const struct xt_tcpmss_match_info *info = matchinfo;
38 struct tcphdr _tcph, *th; 34 struct tcphdr _tcph, *th;
@@ -82,11 +78,11 @@ dropit:
82 return false; 78 return false;
83} 79}
84 80
85static struct xt_match xt_tcpmss_match[] __read_mostly = { 81static struct xt_match tcpmss_mt_reg[] __read_mostly = {
86 { 82 {
87 .name = "tcpmss", 83 .name = "tcpmss",
88 .family = AF_INET, 84 .family = AF_INET,
89 .match = match, 85 .match = tcpmss_mt,
90 .matchsize = sizeof(struct xt_tcpmss_match_info), 86 .matchsize = sizeof(struct xt_tcpmss_match_info),
91 .proto = IPPROTO_TCP, 87 .proto = IPPROTO_TCP,
92 .me = THIS_MODULE, 88 .me = THIS_MODULE,
@@ -94,23 +90,22 @@ static struct xt_match xt_tcpmss_match[] __read_mostly = {
94 { 90 {
95 .name = "tcpmss", 91 .name = "tcpmss",
96 .family = AF_INET6, 92 .family = AF_INET6,
97 .match = match, 93 .match = tcpmss_mt,
98 .matchsize = sizeof(struct xt_tcpmss_match_info), 94 .matchsize = sizeof(struct xt_tcpmss_match_info),
99 .proto = IPPROTO_TCP, 95 .proto = IPPROTO_TCP,
100 .me = THIS_MODULE, 96 .me = THIS_MODULE,
101 }, 97 },
102}; 98};
103 99
104static int __init xt_tcpmss_init(void) 100static int __init tcpmss_mt_init(void)
105{ 101{
106 return xt_register_matches(xt_tcpmss_match, 102 return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
107 ARRAY_SIZE(xt_tcpmss_match));
108} 103}
109 104
110static void __exit xt_tcpmss_fini(void) 105static void __exit tcpmss_mt_exit(void)
111{ 106{
112 xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match)); 107 xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
113} 108}
114 109
115module_init(xt_tcpmss_init); 110module_init(tcpmss_mt_init);
116module_exit(xt_tcpmss_fini); 111module_exit(tcpmss_mt_exit);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 223f9bded672..4fa3b669f691 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -10,7 +10,7 @@
10#include <linux/netfilter_ipv4/ip_tables.h> 10#include <linux/netfilter_ipv4/ip_tables.h>
11#include <linux/netfilter_ipv6/ip6_tables.h> 11#include <linux/netfilter_ipv6/ip6_tables.h>
12 12
13MODULE_DESCRIPTION("x_tables match for TCP and UDP(-Lite), supports IPv4 and IPv6"); 13MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match");
14MODULE_LICENSE("GPL"); 14MODULE_LICENSE("GPL");
15MODULE_ALIAS("xt_tcp"); 15MODULE_ALIAS("xt_tcp");
16MODULE_ALIAS("xt_udp"); 16MODULE_ALIAS("xt_udp");
@@ -68,14 +68,9 @@ tcp_find_option(u_int8_t option,
68} 68}
69 69
70static bool 70static bool
71tcp_match(const struct sk_buff *skb, 71tcp_mt(const struct sk_buff *skb, const struct net_device *in,
72 const struct net_device *in, 72 const struct net_device *out, const struct xt_match *match,
73 const struct net_device *out, 73 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
74 const struct xt_match *match,
75 const void *matchinfo,
76 int offset,
77 unsigned int protoff,
78 bool *hotdrop)
79{ 74{
80 struct tcphdr _tcph, *th; 75 struct tcphdr _tcph, *th;
81 const struct xt_tcp *tcpinfo = matchinfo; 76 const struct xt_tcp *tcpinfo = matchinfo;
@@ -134,11 +129,9 @@ tcp_match(const struct sk_buff *skb,
134 129
135/* Called when user tries to insert an entry of this type. */ 130/* Called when user tries to insert an entry of this type. */
136static bool 131static bool
137tcp_checkentry(const char *tablename, 132tcp_mt_check(const char *tablename, const void *info,
138 const void *info, 133 const struct xt_match *match, void *matchinfo,
139 const struct xt_match *match, 134 unsigned int hook_mask)
140 void *matchinfo,
141 unsigned int hook_mask)
142{ 135{
143 const struct xt_tcp *tcpinfo = matchinfo; 136 const struct xt_tcp *tcpinfo = matchinfo;
144 137
@@ -147,14 +140,9 @@ tcp_checkentry(const char *tablename,
147} 140}
148 141
149static bool 142static bool
150udp_match(const struct sk_buff *skb, 143udp_mt(const struct sk_buff *skb, const struct net_device *in,
151 const struct net_device *in, 144 const struct net_device *out, const struct xt_match *match,
152 const struct net_device *out, 145 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
153 const struct xt_match *match,
154 const void *matchinfo,
155 int offset,
156 unsigned int protoff,
157 bool *hotdrop)
158{ 146{
159 struct udphdr _udph, *uh; 147 struct udphdr _udph, *uh;
160 const struct xt_udp *udpinfo = matchinfo; 148 const struct xt_udp *udpinfo = matchinfo;
@@ -182,11 +170,9 @@ udp_match(const struct sk_buff *skb,
182 170
183/* Called when user tries to insert an entry of this type. */ 171/* Called when user tries to insert an entry of this type. */
184static bool 172static bool
185udp_checkentry(const char *tablename, 173udp_mt_check(const char *tablename, const void *info,
186 const void *info, 174 const struct xt_match *match, void *matchinfo,
187 const struct xt_match *match, 175 unsigned int hook_mask)
188 void *matchinfo,
189 unsigned int hook_mask)
190{ 176{
191 const struct xt_udp *udpinfo = matchinfo; 177 const struct xt_udp *udpinfo = matchinfo;
192 178
@@ -194,12 +180,12 @@ udp_checkentry(const char *tablename,
194 return !(udpinfo->invflags & ~XT_UDP_INV_MASK); 180 return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
195} 181}
196 182
197static struct xt_match xt_tcpudp_match[] __read_mostly = { 183static struct xt_match tcpudp_mt_reg[] __read_mostly = {
198 { 184 {
199 .name = "tcp", 185 .name = "tcp",
200 .family = AF_INET, 186 .family = AF_INET,
201 .checkentry = tcp_checkentry, 187 .checkentry = tcp_mt_check,
202 .match = tcp_match, 188 .match = tcp_mt,
203 .matchsize = sizeof(struct xt_tcp), 189 .matchsize = sizeof(struct xt_tcp),
204 .proto = IPPROTO_TCP, 190 .proto = IPPROTO_TCP,
205 .me = THIS_MODULE, 191 .me = THIS_MODULE,
@@ -207,8 +193,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
207 { 193 {
208 .name = "tcp", 194 .name = "tcp",
209 .family = AF_INET6, 195 .family = AF_INET6,
210 .checkentry = tcp_checkentry, 196 .checkentry = tcp_mt_check,
211 .match = tcp_match, 197 .match = tcp_mt,
212 .matchsize = sizeof(struct xt_tcp), 198 .matchsize = sizeof(struct xt_tcp),
213 .proto = IPPROTO_TCP, 199 .proto = IPPROTO_TCP,
214 .me = THIS_MODULE, 200 .me = THIS_MODULE,
@@ -216,8 +202,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
216 { 202 {
217 .name = "udp", 203 .name = "udp",
218 .family = AF_INET, 204 .family = AF_INET,
219 .checkentry = udp_checkentry, 205 .checkentry = udp_mt_check,
220 .match = udp_match, 206 .match = udp_mt,
221 .matchsize = sizeof(struct xt_udp), 207 .matchsize = sizeof(struct xt_udp),
222 .proto = IPPROTO_UDP, 208 .proto = IPPROTO_UDP,
223 .me = THIS_MODULE, 209 .me = THIS_MODULE,
@@ -225,8 +211,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
225 { 211 {
226 .name = "udp", 212 .name = "udp",
227 .family = AF_INET6, 213 .family = AF_INET6,
228 .checkentry = udp_checkentry, 214 .checkentry = udp_mt_check,
229 .match = udp_match, 215 .match = udp_mt,
230 .matchsize = sizeof(struct xt_udp), 216 .matchsize = sizeof(struct xt_udp),
231 .proto = IPPROTO_UDP, 217 .proto = IPPROTO_UDP,
232 .me = THIS_MODULE, 218 .me = THIS_MODULE,
@@ -234,8 +220,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
234 { 220 {
235 .name = "udplite", 221 .name = "udplite",
236 .family = AF_INET, 222 .family = AF_INET,
237 .checkentry = udp_checkentry, 223 .checkentry = udp_mt_check,
238 .match = udp_match, 224 .match = udp_mt,
239 .matchsize = sizeof(struct xt_udp), 225 .matchsize = sizeof(struct xt_udp),
240 .proto = IPPROTO_UDPLITE, 226 .proto = IPPROTO_UDPLITE,
241 .me = THIS_MODULE, 227 .me = THIS_MODULE,
@@ -243,24 +229,23 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
243 { 229 {
244 .name = "udplite", 230 .name = "udplite",
245 .family = AF_INET6, 231 .family = AF_INET6,
246 .checkentry = udp_checkentry, 232 .checkentry = udp_mt_check,
247 .match = udp_match, 233 .match = udp_mt,
248 .matchsize = sizeof(struct xt_udp), 234 .matchsize = sizeof(struct xt_udp),
249 .proto = IPPROTO_UDPLITE, 235 .proto = IPPROTO_UDPLITE,
250 .me = THIS_MODULE, 236 .me = THIS_MODULE,
251 }, 237 },
252}; 238};
253 239
254static int __init xt_tcpudp_init(void) 240static int __init tcpudp_mt_init(void)
255{ 241{
256 return xt_register_matches(xt_tcpudp_match, 242 return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
257 ARRAY_SIZE(xt_tcpudp_match));
258} 243}
259 244
260static void __exit xt_tcpudp_fini(void) 245static void __exit tcpudp_mt_exit(void)
261{ 246{
262 xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match)); 247 xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
263} 248}
264 249
265module_init(xt_tcpudp_init); 250module_init(tcpudp_mt_init);
266module_exit(xt_tcpudp_fini); 251module_exit(tcpudp_mt_exit);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index ef48bbd93573..e9a8794bc3ab 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * xt_time 2 * xt_time
3 * Copyright © Jan Engelhardt <jengelh@computergmbh.de>, 2007 3 * Copyright © CC Computer Consultants GmbH, 2007
4 * Contact: <jengelh@computergmbh.de>
4 * 5 *
5 * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org> 6 * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
6 * This is a module which is used for time matching 7 * This is a module which is used for time matching
@@ -146,11 +147,10 @@ static void localtime_3(struct xtm *r, time_t time)
146 return; 147 return;
147} 148}
148 149
149static bool xt_time_match(const struct sk_buff *skb, 150static bool
150 const struct net_device *in, 151time_mt(const struct sk_buff *skb, const struct net_device *in,
151 const struct net_device *out, 152 const struct net_device *out, const struct xt_match *match,
152 const struct xt_match *match, const void *matchinfo, 153 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
153 int offset, unsigned int protoff, bool *hotdrop)
154{ 154{
155 const struct xt_time_info *info = matchinfo; 155 const struct xt_time_info *info = matchinfo;
156 unsigned int packet_time; 156 unsigned int packet_time;
@@ -169,7 +169,7 @@ static bool xt_time_match(const struct sk_buff *skb,
169 if (skb->tstamp.tv64 == 0) 169 if (skb->tstamp.tv64 == 0)
170 __net_timestamp((struct sk_buff *)skb); 170 __net_timestamp((struct sk_buff *)skb);
171 171
172 stamp = skb->tstamp.tv64; 172 stamp = ktime_to_ns(skb->tstamp);
173 do_div(stamp, NSEC_PER_SEC); 173 do_div(stamp, NSEC_PER_SEC);
174 174
175 if (info->flags & XT_TIME_LOCAL_TZ) 175 if (info->flags & XT_TIME_LOCAL_TZ)
@@ -215,9 +215,10 @@ static bool xt_time_match(const struct sk_buff *skb,
215 return true; 215 return true;
216} 216}
217 217
218static bool xt_time_check(const char *tablename, const void *ip, 218static bool
219 const struct xt_match *match, void *matchinfo, 219time_mt_check(const char *tablename, const void *ip,
220 unsigned int hook_mask) 220 const struct xt_match *match, void *matchinfo,
221 unsigned int hook_mask)
221{ 222{
222 struct xt_time_info *info = matchinfo; 223 struct xt_time_info *info = matchinfo;
223 224
@@ -231,39 +232,39 @@ static bool xt_time_check(const char *tablename, const void *ip,
231 return true; 232 return true;
232} 233}
233 234
234static struct xt_match xt_time_reg[] __read_mostly = { 235static struct xt_match time_mt_reg[] __read_mostly = {
235 { 236 {
236 .name = "time", 237 .name = "time",
237 .family = AF_INET, 238 .family = AF_INET,
238 .match = xt_time_match, 239 .match = time_mt,
239 .matchsize = sizeof(struct xt_time_info), 240 .matchsize = sizeof(struct xt_time_info),
240 .checkentry = xt_time_check, 241 .checkentry = time_mt_check,
241 .me = THIS_MODULE, 242 .me = THIS_MODULE,
242 }, 243 },
243 { 244 {
244 .name = "time", 245 .name = "time",
245 .family = AF_INET6, 246 .family = AF_INET6,
246 .match = xt_time_match, 247 .match = time_mt,
247 .matchsize = sizeof(struct xt_time_info), 248 .matchsize = sizeof(struct xt_time_info),
248 .checkentry = xt_time_check, 249 .checkentry = time_mt_check,
249 .me = THIS_MODULE, 250 .me = THIS_MODULE,
250 }, 251 },
251}; 252};
252 253
253static int __init xt_time_init(void) 254static int __init time_mt_init(void)
254{ 255{
255 return xt_register_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); 256 return xt_register_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
256} 257}
257 258
258static void __exit xt_time_exit(void) 259static void __exit time_mt_exit(void)
259{ 260{
260 xt_unregister_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); 261 xt_unregister_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
261} 262}
262 263
263module_init(xt_time_init); 264module_init(time_mt_init);
264module_exit(xt_time_exit); 265module_exit(time_mt_exit);
265MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 266MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
266MODULE_DESCRIPTION("netfilter time match"); 267MODULE_DESCRIPTION("Xtables: time-based matching");
267MODULE_LICENSE("GPL"); 268MODULE_LICENSE("GPL");
268MODULE_ALIAS("ipt_time"); 269MODULE_ALIAS("ipt_time");
269MODULE_ALIAS("ip6t_time"); 270MODULE_ALIAS("ip6t_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index bec427915b30..9b8ed390a8e0 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -2,7 +2,8 @@
2 * xt_u32 - kernel module to match u32 packet content 2 * xt_u32 - kernel module to match u32 packet content
3 * 3 *
4 * Original author: Don Cohen <don@isis.cs3-inc.com> 4 * Original author: Don Cohen <don@isis.cs3-inc.com>
5 * © Jan Engelhardt <jengelh@gmx.de>, 2007 5 * (C) CC Computer Consultants GmbH, 2007
6 * Contact: <jengelh@computergmbh.de>
6 */ 7 */
7 8
8#include <linux/module.h> 9#include <linux/module.h>
@@ -87,11 +88,10 @@ static bool u32_match_it(const struct xt_u32 *data,
87 return true; 88 return true;
88} 89}
89 90
90static bool u32_match(const struct sk_buff *skb, 91static bool
91 const struct net_device *in, 92u32_mt(const struct sk_buff *skb, const struct net_device *in,
92 const struct net_device *out, 93 const struct net_device *out, const struct xt_match *match,
93 const struct xt_match *match, const void *matchinfo, 94 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
94 int offset, unsigned int protoff, bool *hotdrop)
95{ 95{
96 const struct xt_u32 *data = matchinfo; 96 const struct xt_u32 *data = matchinfo;
97 bool ret; 97 bool ret;
@@ -100,37 +100,37 @@ static bool u32_match(const struct sk_buff *skb,
100 return ret ^ data->invert; 100 return ret ^ data->invert;
101} 101}
102 102
103static struct xt_match u32_reg[] __read_mostly = { 103static struct xt_match u32_mt_reg[] __read_mostly = {
104 { 104 {
105 .name = "u32", 105 .name = "u32",
106 .family = AF_INET, 106 .family = AF_INET,
107 .match = u32_match, 107 .match = u32_mt,
108 .matchsize = sizeof(struct xt_u32), 108 .matchsize = sizeof(struct xt_u32),
109 .me = THIS_MODULE, 109 .me = THIS_MODULE,
110 }, 110 },
111 { 111 {
112 .name = "u32", 112 .name = "u32",
113 .family = AF_INET6, 113 .family = AF_INET6,
114 .match = u32_match, 114 .match = u32_mt,
115 .matchsize = sizeof(struct xt_u32), 115 .matchsize = sizeof(struct xt_u32),
116 .me = THIS_MODULE, 116 .me = THIS_MODULE,
117 }, 117 },
118}; 118};
119 119
120static int __init xt_u32_init(void) 120static int __init u32_mt_init(void)
121{ 121{
122 return xt_register_matches(u32_reg, ARRAY_SIZE(u32_reg)); 122 return xt_register_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
123} 123}
124 124
125static void __exit xt_u32_exit(void) 125static void __exit u32_mt_exit(void)
126{ 126{
127 xt_unregister_matches(u32_reg, ARRAY_SIZE(u32_reg)); 127 xt_unregister_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
128} 128}
129 129
130module_init(xt_u32_init); 130module_init(u32_mt_init);
131module_exit(xt_u32_exit); 131module_exit(u32_mt_exit);
132MODULE_AUTHOR("Jan Engelhardt <jengelh@gmx.de>"); 132MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
133MODULE_DESCRIPTION("netfilter u32 match module"); 133MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
134MODULE_LICENSE("GPL"); 134MODULE_LICENSE("GPL");
135MODULE_ALIAS("ipt_u32"); 135MODULE_ALIAS("ipt_u32");
136MODULE_ALIAS("ip6t_u32"); 136MODULE_ALIAS("ip6t_u32");
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index ba0ca8d3f77d..becf91a952ae 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -38,6 +38,7 @@
38#include <net/genetlink.h> 38#include <net/genetlink.h>
39#include <net/netlabel.h> 39#include <net/netlabel.h>
40#include <net/cipso_ipv4.h> 40#include <net/cipso_ipv4.h>
41#include <asm/atomic.h>
41 42
42#include "netlabel_user.h" 43#include "netlabel_user.h"
43#include "netlabel_cipso_v4.h" 44#include "netlabel_cipso_v4.h"
@@ -421,7 +422,7 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
421 break; 422 break;
422 } 423 }
423 if (ret_val == 0) 424 if (ret_val == 0)
424 netlbl_mgmt_protocount_inc(); 425 atomic_inc(&netlabel_mgmt_protocount);
425 426
426 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, 427 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
427 &audit_info); 428 &audit_info);
@@ -698,7 +699,7 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
698 &audit_info, 699 &audit_info,
699 netlbl_cipsov4_doi_free); 700 netlbl_cipsov4_doi_free);
700 if (ret_val == 0) 701 if (ret_val == 0)
701 netlbl_mgmt_protocount_dec(); 702 atomic_dec(&netlabel_mgmt_protocount);
702 703
703 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, 704 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
704 &audit_info); 705 &audit_info);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index b3675bd7db33..9a8ea0195c4f 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -54,9 +54,6 @@ struct netlbl_domhsh_tbl {
54 * hash table should be okay */ 54 * hash table should be okay */
55static DEFINE_SPINLOCK(netlbl_domhsh_lock); 55static DEFINE_SPINLOCK(netlbl_domhsh_lock);
56static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; 56static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
57
58/* Default domain mapping */
59static DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
60static struct netlbl_dom_map *netlbl_domhsh_def = NULL; 57static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
61 58
62/* 59/*
@@ -109,17 +106,14 @@ static u32 netlbl_domhsh_hash(const char *key)
109/** 106/**
110 * netlbl_domhsh_search - Search for a domain entry 107 * netlbl_domhsh_search - Search for a domain entry
111 * @domain: the domain 108 * @domain: the domain
112 * @def: return default if no match is found
113 * 109 *
114 * Description: 110 * Description:
115 * Searches the domain hash table and returns a pointer to the hash table 111 * Searches the domain hash table and returns a pointer to the hash table
116 * entry if found, otherwise NULL is returned. If @def is non-zero and a 112 * entry if found, otherwise NULL is returned. The caller is responsibile for
117 * match is not found in the domain hash table the default mapping is returned 113 * the rcu hash table locks (i.e. the caller much call rcu_read_[un]lock()).
118 * if it exists. The caller is responsibile for the rcu hash table locks
119 * (i.e. the caller much call rcu_read_[un]lock()).
120 * 114 *
121 */ 115 */
122static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) 116static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
123{ 117{
124 u32 bkt; 118 u32 bkt;
125 struct netlbl_dom_map *iter; 119 struct netlbl_dom_map *iter;
@@ -133,10 +127,31 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
133 return iter; 127 return iter;
134 } 128 }
135 129
136 if (def != 0) { 130 return NULL;
137 iter = rcu_dereference(netlbl_domhsh_def); 131}
138 if (iter != NULL && iter->valid) 132
139 return iter; 133/**
134 * netlbl_domhsh_search_def - Search for a domain entry
135 * @domain: the domain
136 * @def: return default if no match is found
137 *
138 * Description:
139 * Searches the domain hash table and returns a pointer to the hash table
140 * entry if an exact match is found, if an exact match is not present in the
141 * hash table then the default entry is returned if valid otherwise NULL is
142 * returned. The caller is responsibile for the rcu hash table locks
143 * (i.e. the caller much call rcu_read_[un]lock()).
144 *
145 */
146static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
147{
148 struct netlbl_dom_map *entry;
149
150 entry = netlbl_domhsh_search(domain);
151 if (entry == NULL) {
152 entry = rcu_dereference(netlbl_domhsh_def);
153 if (entry != NULL && entry->valid)
154 return entry;
140 } 155 }
141 156
142 return NULL; 157 return NULL;
@@ -221,24 +236,22 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
221 INIT_RCU_HEAD(&entry->rcu); 236 INIT_RCU_HEAD(&entry->rcu);
222 237
223 rcu_read_lock(); 238 rcu_read_lock();
239 spin_lock(&netlbl_domhsh_lock);
224 if (entry->domain != NULL) { 240 if (entry->domain != NULL) {
225 bkt = netlbl_domhsh_hash(entry->domain); 241 bkt = netlbl_domhsh_hash(entry->domain);
226 spin_lock(&netlbl_domhsh_lock); 242 if (netlbl_domhsh_search(entry->domain) == NULL)
227 if (netlbl_domhsh_search(entry->domain, 0) == NULL)
228 list_add_tail_rcu(&entry->list, 243 list_add_tail_rcu(&entry->list,
229 &rcu_dereference(netlbl_domhsh)->tbl[bkt]); 244 &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
230 else 245 else
231 ret_val = -EEXIST; 246 ret_val = -EEXIST;
232 spin_unlock(&netlbl_domhsh_lock);
233 } else { 247 } else {
234 INIT_LIST_HEAD(&entry->list); 248 INIT_LIST_HEAD(&entry->list);
235 spin_lock(&netlbl_domhsh_def_lock);
236 if (rcu_dereference(netlbl_domhsh_def) == NULL) 249 if (rcu_dereference(netlbl_domhsh_def) == NULL)
237 rcu_assign_pointer(netlbl_domhsh_def, entry); 250 rcu_assign_pointer(netlbl_domhsh_def, entry);
238 else 251 else
239 ret_val = -EEXIST; 252 ret_val = -EEXIST;
240 spin_unlock(&netlbl_domhsh_def_lock);
241 } 253 }
254 spin_unlock(&netlbl_domhsh_lock);
242 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); 255 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
243 if (audit_buf != NULL) { 256 if (audit_buf != NULL) {
244 audit_log_format(audit_buf, 257 audit_log_format(audit_buf,
@@ -307,7 +320,10 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
307 struct audit_buffer *audit_buf; 320 struct audit_buffer *audit_buf;
308 321
309 rcu_read_lock(); 322 rcu_read_lock();
310 entry = netlbl_domhsh_search(domain, (domain != NULL ? 0 : 1)); 323 if (domain)
324 entry = netlbl_domhsh_search(domain);
325 else
326 entry = netlbl_domhsh_search_def(domain);
311 if (entry == NULL) 327 if (entry == NULL)
312 goto remove_return; 328 goto remove_return;
313 switch (entry->type) { 329 switch (entry->type) {
@@ -316,23 +332,16 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
316 entry->domain); 332 entry->domain);
317 break; 333 break;
318 } 334 }
319 if (entry != rcu_dereference(netlbl_domhsh_def)) { 335 spin_lock(&netlbl_domhsh_lock);
320 spin_lock(&netlbl_domhsh_lock); 336 if (entry->valid) {
321 if (entry->valid) { 337 entry->valid = 0;
322 entry->valid = 0; 338 if (entry != rcu_dereference(netlbl_domhsh_def))
323 list_del_rcu(&entry->list); 339 list_del_rcu(&entry->list);
324 ret_val = 0; 340 else
325 }
326 spin_unlock(&netlbl_domhsh_lock);
327 } else {
328 spin_lock(&netlbl_domhsh_def_lock);
329 if (entry->valid) {
330 entry->valid = 0;
331 rcu_assign_pointer(netlbl_domhsh_def, NULL); 341 rcu_assign_pointer(netlbl_domhsh_def, NULL);
332 ret_val = 0; 342 ret_val = 0;
333 }
334 spin_unlock(&netlbl_domhsh_def_lock);
335 } 343 }
344 spin_unlock(&netlbl_domhsh_lock);
336 345
337 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); 346 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
338 if (audit_buf != NULL) { 347 if (audit_buf != NULL) {
@@ -377,7 +386,7 @@ int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info)
377 */ 386 */
378struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) 387struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
379{ 388{
380 return netlbl_domhsh_search(domain, 1); 389 return netlbl_domhsh_search_def(domain);
381} 390}
382 391
383/** 392/**
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 4f50949722a9..c69e3e1f05c3 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -34,6 +34,7 @@
34#include <net/netlabel.h> 34#include <net/netlabel.h>
35#include <net/cipso_ipv4.h> 35#include <net/cipso_ipv4.h>
36#include <asm/bug.h> 36#include <asm/bug.h>
37#include <asm/atomic.h>
37 38
38#include "netlabel_domainhash.h" 39#include "netlabel_domainhash.h"
39#include "netlabel_unlabeled.h" 40#include "netlabel_unlabeled.h"
@@ -262,7 +263,7 @@ int netlbl_enabled(void)
262 /* At some point we probably want to expose this mechanism to the user 263 /* At some point we probably want to expose this mechanism to the user
263 * as well so that admins can toggle NetLabel regardless of the 264 * as well so that admins can toggle NetLabel regardless of the
264 * configuration */ 265 * configuration */
265 return (netlbl_mgmt_protocount_value() > 0 ? 1 : 0); 266 return (atomic_read(&netlabel_mgmt_protocount) > 0);
266} 267}
267 268
268/** 269/**
@@ -311,7 +312,7 @@ socket_setattr_return:
311 * @secattr: the security attributes 312 * @secattr: the security attributes
312 * 313 *
313 * Description: 314 * Description:
314 * Examines the given sock to see any NetLabel style labeling has been 315 * Examines the given sock to see if any NetLabel style labeling has been
315 * applied to the sock, if so it parses the socket label and returns the 316 * applied to the sock, if so it parses the socket label and returns the
316 * security attributes in @secattr. Returns zero on success, negative values 317 * security attributes in @secattr. Returns zero on success, negative values
317 * on failure. 318 * on failure.
@@ -319,18 +320,13 @@ socket_setattr_return:
319 */ 320 */
320int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) 321int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
321{ 322{
322 int ret_val; 323 return cipso_v4_sock_getattr(sk, secattr);
323
324 ret_val = cipso_v4_sock_getattr(sk, secattr);
325 if (ret_val == 0)
326 return 0;
327
328 return netlbl_unlabel_getattr(secattr);
329} 324}
330 325
331/** 326/**
332 * netlbl_skbuff_getattr - Determine the security attributes of a packet 327 * netlbl_skbuff_getattr - Determine the security attributes of a packet
333 * @skb: the packet 328 * @skb: the packet
329 * @family: protocol family
334 * @secattr: the security attributes 330 * @secattr: the security attributes
335 * 331 *
336 * Description: 332 * Description:
@@ -341,13 +337,14 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
341 * 337 *
342 */ 338 */
343int netlbl_skbuff_getattr(const struct sk_buff *skb, 339int netlbl_skbuff_getattr(const struct sk_buff *skb,
340 u16 family,
344 struct netlbl_lsm_secattr *secattr) 341 struct netlbl_lsm_secattr *secattr)
345{ 342{
346 if (CIPSO_V4_OPTEXIST(skb) && 343 if (CIPSO_V4_OPTEXIST(skb) &&
347 cipso_v4_skbuff_getattr(skb, secattr) == 0) 344 cipso_v4_skbuff_getattr(skb, secattr) == 0)
348 return 0; 345 return 0;
349 346
350 return netlbl_unlabel_getattr(secattr); 347 return netlbl_unlabel_getattr(skb, family, secattr);
351} 348}
352 349
353/** 350/**
@@ -431,6 +428,10 @@ static int __init netlbl_init(void)
431 if (ret_val != 0) 428 if (ret_val != 0)
432 goto init_failure; 429 goto init_failure;
433 430
431 ret_val = netlbl_unlabel_init(NETLBL_UNLHSH_BITSIZE);
432 if (ret_val != 0)
433 goto init_failure;
434
434 ret_val = netlbl_netlink_init(); 435 ret_val = netlbl_netlink_init();
435 if (ret_val != 0) 436 if (ret_val != 0)
436 goto init_failure; 437 goto init_failure;
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 56483377997a..e2258dc3c845 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -37,14 +37,14 @@
37#include <net/genetlink.h> 37#include <net/genetlink.h>
38#include <net/netlabel.h> 38#include <net/netlabel.h>
39#include <net/cipso_ipv4.h> 39#include <net/cipso_ipv4.h>
40#include <asm/atomic.h>
40 41
41#include "netlabel_domainhash.h" 42#include "netlabel_domainhash.h"
42#include "netlabel_user.h" 43#include "netlabel_user.h"
43#include "netlabel_mgmt.h" 44#include "netlabel_mgmt.h"
44 45
45/* NetLabel configured protocol count */ 46/* NetLabel configured protocol counter */
46static DEFINE_SPINLOCK(netlabel_mgmt_protocount_lock); 47atomic_t netlabel_mgmt_protocount = ATOMIC_INIT(0);
47static u32 netlabel_mgmt_protocount = 0;
48 48
49/* Argument struct for netlbl_domhsh_walk() */ 49/* Argument struct for netlbl_domhsh_walk() */
50struct netlbl_domhsh_walk_arg { 50struct netlbl_domhsh_walk_arg {
@@ -71,63 +71,6 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
71}; 71};
72 72
73/* 73/*
74 * NetLabel Misc Managment Functions
75 */
76
77/**
78 * netlbl_mgmt_protocount_inc - Increment the configured labeled protocol count
79 *
80 * Description:
81 * Increment the number of labeled protocol configurations in the current
82 * NetLabel configuration. Keep track of this for use in determining if
83 * NetLabel label enforcement should be active/enabled or not in the LSM.
84 *
85 */
86void netlbl_mgmt_protocount_inc(void)
87{
88 spin_lock(&netlabel_mgmt_protocount_lock);
89 netlabel_mgmt_protocount++;
90 spin_unlock(&netlabel_mgmt_protocount_lock);
91}
92
93/**
94 * netlbl_mgmt_protocount_dec - Decrement the configured labeled protocol count
95 *
96 * Description:
97 * Decrement the number of labeled protocol configurations in the current
98 * NetLabel configuration. Keep track of this for use in determining if
99 * NetLabel label enforcement should be active/enabled or not in the LSM.
100 *
101 */
102void netlbl_mgmt_protocount_dec(void)
103{
104 spin_lock(&netlabel_mgmt_protocount_lock);
105 if (netlabel_mgmt_protocount > 0)
106 netlabel_mgmt_protocount--;
107 spin_unlock(&netlabel_mgmt_protocount_lock);
108}
109
110/**
111 * netlbl_mgmt_protocount_value - Return the number of configured protocols
112 *
113 * Description:
114 * Return the number of labeled protocols in the current NetLabel
115 * configuration. This value is useful in determining if NetLabel label
116 * enforcement should be active/enabled or not in the LSM.
117 *
118 */
119u32 netlbl_mgmt_protocount_value(void)
120{
121 u32 val;
122
123 rcu_read_lock();
124 val = netlabel_mgmt_protocount;
125 rcu_read_unlock();
126
127 return val;
128}
129
130/*
131 * NetLabel Command Handlers 74 * NetLabel Command Handlers
132 */ 75 */
133 76
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index ccb2b3923591..a43bff169d6b 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -32,6 +32,7 @@
32#define _NETLABEL_MGMT_H 32#define _NETLABEL_MGMT_H
33 33
34#include <net/netlabel.h> 34#include <net/netlabel.h>
35#include <asm/atomic.h>
35 36
36/* 37/*
37 * The following NetLabel payloads are supported by the management interface. 38 * The following NetLabel payloads are supported by the management interface.
@@ -168,9 +169,7 @@ enum {
168/* NetLabel protocol functions */ 169/* NetLabel protocol functions */
169int netlbl_mgmt_genl_init(void); 170int netlbl_mgmt_genl_init(void);
170 171
171/* NetLabel misc management functions */ 172/* NetLabel configured protocol reference counter */
172void netlbl_mgmt_protocount_inc(void); 173extern atomic_t netlabel_mgmt_protocount;
173void netlbl_mgmt_protocount_dec(void);
174u32 netlbl_mgmt_protocount_value(void);
175 174
176#endif 175#endif
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 348292450deb..42e81fd8cc49 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -10,7 +10,7 @@
10 */ 10 */
11 11
12/* 12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007
14 * 14 *
15 * This program is free software; you can redistribute it and/or modify 15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by 16 * it under the terms of the GNU General Public License as published by
@@ -36,22 +36,92 @@
36#include <linux/string.h> 36#include <linux/string.h>
37#include <linux/skbuff.h> 37#include <linux/skbuff.h>
38#include <linux/audit.h> 38#include <linux/audit.h>
39#include <linux/in.h>
40#include <linux/in6.h>
41#include <linux/ip.h>
42#include <linux/ipv6.h>
43#include <linux/notifier.h>
44#include <linux/netdevice.h>
45#include <linux/security.h>
39#include <net/sock.h> 46#include <net/sock.h>
40#include <net/netlink.h> 47#include <net/netlink.h>
41#include <net/genetlink.h> 48#include <net/genetlink.h>
42 49#include <net/ip.h>
50#include <net/ipv6.h>
51#include <net/net_namespace.h>
43#include <net/netlabel.h> 52#include <net/netlabel.h>
44#include <asm/bug.h> 53#include <asm/bug.h>
54#include <asm/atomic.h>
45 55
46#include "netlabel_user.h" 56#include "netlabel_user.h"
47#include "netlabel_domainhash.h" 57#include "netlabel_domainhash.h"
48#include "netlabel_unlabeled.h" 58#include "netlabel_unlabeled.h"
59#include "netlabel_mgmt.h"
60
61/* NOTE: at present we always use init's network namespace since we don't
62 * presently support different namespaces even though the majority of
63 * the functions in this file are "namespace safe" */
64
65/* The unlabeled connection hash table which we use to map network interfaces
66 * and addresses of unlabeled packets to a user specified secid value for the
67 * LSM. The hash table is used to lookup the network interface entry
68 * (struct netlbl_unlhsh_iface) and then the interface entry is used to
69 * lookup an IP address match from an ordered list. If a network interface
70 * match can not be found in the hash table then the default entry
71 * (netlbl_unlhsh_def) is used. The IP address entry list
72 * (struct netlbl_unlhsh_addr) is ordered such that the entries with a
73 * larger netmask come first.
74 */
75struct netlbl_unlhsh_tbl {
76 struct list_head *tbl;
77 u32 size;
78};
79struct netlbl_unlhsh_addr4 {
80 __be32 addr;
81 __be32 mask;
82 u32 secid;
83
84 u32 valid;
85 struct list_head list;
86 struct rcu_head rcu;
87};
88struct netlbl_unlhsh_addr6 {
89 struct in6_addr addr;
90 struct in6_addr mask;
91 u32 secid;
92
93 u32 valid;
94 struct list_head list;
95 struct rcu_head rcu;
96};
97struct netlbl_unlhsh_iface {
98 int ifindex;
99 struct list_head addr4_list;
100 struct list_head addr6_list;
101
102 u32 valid;
103 struct list_head list;
104 struct rcu_head rcu;
105};
106
107/* Argument struct for netlbl_unlhsh_walk() */
108struct netlbl_unlhsh_walk_arg {
109 struct netlink_callback *nl_cb;
110 struct sk_buff *skb;
111 u32 seq;
112};
113
114/* Unlabeled connection hash table */
115/* updates should be so rare that having one spinlock for the entire
116 * hash table should be okay */
117static DEFINE_SPINLOCK(netlbl_unlhsh_lock);
118static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL;
119static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL;
49 120
50/* Accept unlabeled packets flag */ 121/* Accept unlabeled packets flag */
51static DEFINE_SPINLOCK(netlabel_unlabel_acceptflg_lock);
52static u8 netlabel_unlabel_acceptflg = 0; 122static u8 netlabel_unlabel_acceptflg = 0;
53 123
54/* NetLabel Generic NETLINK CIPSOv4 family */ 124/* NetLabel Generic NETLINK unlabeled family */
55static struct genl_family netlbl_unlabel_gnl_family = { 125static struct genl_family netlbl_unlabel_gnl_family = {
56 .id = GENL_ID_GENERATE, 126 .id = GENL_ID_GENERATE,
57 .hdrsize = 0, 127 .hdrsize = 0,
@@ -63,11 +133,841 @@ static struct genl_family netlbl_unlabel_gnl_family = {
63/* NetLabel Netlink attribute policy */ 133/* NetLabel Netlink attribute policy */
64static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { 134static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
65 [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, 135 [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
136 [NLBL_UNLABEL_A_IPV6ADDR] = { .type = NLA_BINARY,
137 .len = sizeof(struct in6_addr) },
138 [NLBL_UNLABEL_A_IPV6MASK] = { .type = NLA_BINARY,
139 .len = sizeof(struct in6_addr) },
140 [NLBL_UNLABEL_A_IPV4ADDR] = { .type = NLA_BINARY,
141 .len = sizeof(struct in_addr) },
142 [NLBL_UNLABEL_A_IPV4MASK] = { .type = NLA_BINARY,
143 .len = sizeof(struct in_addr) },
144 [NLBL_UNLABEL_A_IFACE] = { .type = NLA_NUL_STRING,
145 .len = IFNAMSIZ - 1 },
146 [NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY }
66}; 147};
67 148
68/* 149/*
69 * Helper Functions 150 * Audit Helper Functions
151 */
152
153/**
154 * netlbl_unlabel_audit_addr4 - Audit an IPv4 address
155 * @audit_buf: audit buffer
156 * @dev: network interface
157 * @addr: IP address
158 * @mask: IP address mask
159 *
160 * Description:
161 * Write the IPv4 address and address mask, if necessary, to @audit_buf.
162 *
163 */
164static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf,
165 const char *dev,
166 __be32 addr, __be32 mask)
167{
168 u32 mask_val = ntohl(mask);
169
170 if (dev != NULL)
171 audit_log_format(audit_buf, " netif=%s", dev);
172 audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr));
173 if (mask_val != 0xffffffff) {
174 u32 mask_len = 0;
175 while (mask_val > 0) {
176 mask_val <<= 1;
177 mask_len++;
178 }
179 audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
180 }
181}
182
183/**
184 * netlbl_unlabel_audit_addr6 - Audit an IPv6 address
185 * @audit_buf: audit buffer
186 * @dev: network interface
187 * @addr: IP address
188 * @mask: IP address mask
189 *
190 * Description:
191 * Write the IPv6 address and address mask, if necessary, to @audit_buf.
192 *
193 */
194static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf,
195 const char *dev,
196 const struct in6_addr *addr,
197 const struct in6_addr *mask)
198{
199 if (dev != NULL)
200 audit_log_format(audit_buf, " netif=%s", dev);
201 audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr));
202 if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
203 u32 mask_len = 0;
204 u32 mask_val;
205 int iter = -1;
206 while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
207 mask_len += 32;
208 mask_val = ntohl(mask->s6_addr32[iter]);
209 while (mask_val > 0) {
210 mask_val <<= 1;
211 mask_len++;
212 }
213 audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
214 }
215}
216
217/*
218 * Unlabeled Connection Hash Table Functions
219 */
220
221/**
222 * netlbl_unlhsh_free_addr4 - Frees an IPv4 address entry from the hash table
223 * @entry: the entry's RCU field
224 *
225 * Description:
226 * This function is designed to be used as a callback to the call_rcu()
227 * function so that memory allocated to a hash table address entry can be
228 * released safely.
229 *
230 */
231static void netlbl_unlhsh_free_addr4(struct rcu_head *entry)
232{
233 struct netlbl_unlhsh_addr4 *ptr;
234
235 ptr = container_of(entry, struct netlbl_unlhsh_addr4, rcu);
236 kfree(ptr);
237}
238
239#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
240/**
241 * netlbl_unlhsh_free_addr6 - Frees an IPv6 address entry from the hash table
242 * @entry: the entry's RCU field
243 *
244 * Description:
245 * This function is designed to be used as a callback to the call_rcu()
246 * function so that memory allocated to a hash table address entry can be
247 * released safely.
248 *
249 */
250static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
251{
252 struct netlbl_unlhsh_addr6 *ptr;
253
254 ptr = container_of(entry, struct netlbl_unlhsh_addr6, rcu);
255 kfree(ptr);
256}
257#endif /* IPv6 */
258
259/**
260 * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table
261 * @entry: the entry's RCU field
262 *
263 * Description:
264 * This function is designed to be used as a callback to the call_rcu()
265 * function so that memory allocated to a hash table interface entry can be
266 * released safely. It is important to note that this function does not free
267 * the IPv4 and IPv6 address lists contained as part of an interface entry. It
268 * is up to the rest of the code to make sure an interface entry is only freed
269 * once it's address lists are empty.
270 *
271 */
272static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
273{
274 struct netlbl_unlhsh_iface *iface;
275 struct netlbl_unlhsh_addr4 *iter4;
276 struct netlbl_unlhsh_addr4 *tmp4;
277 struct netlbl_unlhsh_addr6 *iter6;
278 struct netlbl_unlhsh_addr6 *tmp6;
279
280 iface = container_of(entry, struct netlbl_unlhsh_iface, rcu);
281
282 /* no need for locks here since we are the only one with access to this
283 * structure */
284
285 list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list)
286 if (iter4->valid) {
287 list_del_rcu(&iter4->list);
288 kfree(iter4);
289 }
290 list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list)
291 if (iter6->valid) {
292 list_del_rcu(&iter6->list);
293 kfree(iter6);
294 }
295 kfree(iface);
296}
297
298/**
299 * netlbl_unlhsh_hash - Hashing function for the hash table
300 * @ifindex: the network interface/device to hash
301 *
302 * Description:
303 * This is the hashing function for the unlabeled hash table, it returns the
304 * bucket number for the given device/interface. The caller is responsible for
305 * calling the rcu_read_[un]lock() functions.
306 *
70 */ 307 */
308static u32 netlbl_unlhsh_hash(int ifindex)
309{
310 /* this is taken _almost_ directly from
311 * security/selinux/netif.c:sel_netif_hasfn() as they do pretty much
312 * the same thing */
313 return ifindex & (rcu_dereference(netlbl_unlhsh)->size - 1);
314}
315
316/**
317 * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry
318 * @addr: IPv4 address
319 * @iface: the network interface entry
320 *
321 * Description:
322 * Searches the IPv4 address list of the network interface specified by @iface.
323 * If a matching address entry is found it is returned, otherwise NULL is
324 * returned. The caller is responsible for calling the rcu_read_[un]lock()
325 * functions.
326 *
327 */
328static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4(
329 __be32 addr,
330 const struct netlbl_unlhsh_iface *iface)
331{
332 struct netlbl_unlhsh_addr4 *iter;
333
334 list_for_each_entry_rcu(iter, &iface->addr4_list, list)
335 if (iter->valid && (addr & iter->mask) == iter->addr)
336 return iter;
337
338 return NULL;
339}
340
341#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
342/**
343 * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry
344 * @addr: IPv6 address
345 * @iface: the network interface entry
346 *
347 * Description:
348 * Searches the IPv6 address list of the network interface specified by @iface.
349 * If a matching address entry is found it is returned, otherwise NULL is
350 * returned. The caller is responsible for calling the rcu_read_[un]lock()
351 * functions.
352 *
353 */
354static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
355 const struct in6_addr *addr,
356 const struct netlbl_unlhsh_iface *iface)
357{
358 struct netlbl_unlhsh_addr6 *iter;
359
360 list_for_each_entry_rcu(iter, &iface->addr6_list, list)
361 if (iter->valid &&
362 ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
363 return iter;
364
365 return NULL;
366}
367#endif /* IPv6 */
368
369/**
370 * netlbl_unlhsh_search_iface - Search for a matching interface entry
371 * @ifindex: the network interface
372 *
373 * Description:
374 * Searches the unlabeled connection hash table and returns a pointer to the
375 * interface entry which matches @ifindex, otherwise NULL is returned. The
376 * caller is responsible for calling the rcu_read_[un]lock() functions.
377 *
378 */
379static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
380{
381 u32 bkt;
382 struct netlbl_unlhsh_iface *iter;
383
384 bkt = netlbl_unlhsh_hash(ifindex);
385 list_for_each_entry_rcu(iter,
386 &rcu_dereference(netlbl_unlhsh)->tbl[bkt],
387 list)
388 if (iter->valid && iter->ifindex == ifindex)
389 return iter;
390
391 return NULL;
392}
393
394/**
395 * netlbl_unlhsh_search_iface_def - Search for a matching interface entry
396 * @ifindex: the network interface
397 *
398 * Description:
399 * Searches the unlabeled connection hash table and returns a pointer to the
400 * interface entry which matches @ifindex. If an exact match can not be found
401 * and there is a valid default entry, the default entry is returned, otherwise
402 * NULL is returned. The caller is responsible for calling the
403 * rcu_read_[un]lock() functions.
404 *
405 */
406static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface_def(int ifindex)
407{
408 struct netlbl_unlhsh_iface *entry;
409
410 entry = netlbl_unlhsh_search_iface(ifindex);
411 if (entry != NULL)
412 return entry;
413
414 entry = rcu_dereference(netlbl_unlhsh_def);
415 if (entry != NULL && entry->valid)
416 return entry;
417
418 return NULL;
419}
420
421/**
422 * netlbl_unlhsh_add_addr4 - Add a new IPv4 address entry to the hash table
423 * @iface: the associated interface entry
424 * @addr: IPv4 address in network byte order
425 * @mask: IPv4 address mask in network byte order
426 * @secid: LSM secid value for entry
427 *
428 * Description:
429 * Add a new address entry into the unlabeled connection hash table using the
430 * interface entry specified by @iface. On success zero is returned, otherwise
431 * a negative value is returned. The caller is responsible for calling the
432 * rcu_read_[un]lock() functions.
433 *
434 */
435static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
436 const struct in_addr *addr,
437 const struct in_addr *mask,
438 u32 secid)
439{
440 struct netlbl_unlhsh_addr4 *entry;
441 struct netlbl_unlhsh_addr4 *iter;
442
443 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
444 if (entry == NULL)
445 return -ENOMEM;
446
447 entry->addr = addr->s_addr & mask->s_addr;
448 entry->mask = mask->s_addr;
449 entry->secid = secid;
450 entry->valid = 1;
451 INIT_RCU_HEAD(&entry->rcu);
452
453 spin_lock(&netlbl_unlhsh_lock);
454 iter = netlbl_unlhsh_search_addr4(entry->addr, iface);
455 if (iter != NULL &&
456 iter->addr == addr->s_addr && iter->mask == mask->s_addr) {
457 spin_unlock(&netlbl_unlhsh_lock);
458 kfree(entry);
459 return -EEXIST;
460 }
461 /* in order to speed up address searches through the list (the common
462 * case) we need to keep the list in order based on the size of the
463 * address mask such that the entry with the widest mask (smallest
464 * numerical value) appears first in the list */
465 list_for_each_entry_rcu(iter, &iface->addr4_list, list)
466 if (iter->valid &&
467 ntohl(entry->mask) > ntohl(iter->mask)) {
468 __list_add_rcu(&entry->list,
469 iter->list.prev,
470 &iter->list);
471 spin_unlock(&netlbl_unlhsh_lock);
472 return 0;
473 }
474 list_add_tail_rcu(&entry->list, &iface->addr4_list);
475 spin_unlock(&netlbl_unlhsh_lock);
476 return 0;
477}
478
479#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
480/**
481 * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table
482 * @iface: the associated interface entry
483 * @addr: IPv6 address in network byte order
484 * @mask: IPv6 address mask in network byte order
485 * @secid: LSM secid value for entry
486 *
487 * Description:
488 * Add a new address entry into the unlabeled connection hash table using the
489 * interface entry specified by @iface. On success zero is returned, otherwise
490 * a negative value is returned. The caller is responsible for calling the
491 * rcu_read_[un]lock() functions.
492 *
493 */
494static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
495 const struct in6_addr *addr,
496 const struct in6_addr *mask,
497 u32 secid)
498{
499 struct netlbl_unlhsh_addr6 *entry;
500 struct netlbl_unlhsh_addr6 *iter;
501
502 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
503 if (entry == NULL)
504 return -ENOMEM;
505
506 ipv6_addr_copy(&entry->addr, addr);
507 entry->addr.s6_addr32[0] &= mask->s6_addr32[0];
508 entry->addr.s6_addr32[1] &= mask->s6_addr32[1];
509 entry->addr.s6_addr32[2] &= mask->s6_addr32[2];
510 entry->addr.s6_addr32[3] &= mask->s6_addr32[3];
511 ipv6_addr_copy(&entry->mask, mask);
512 entry->secid = secid;
513 entry->valid = 1;
514 INIT_RCU_HEAD(&entry->rcu);
515
516 spin_lock(&netlbl_unlhsh_lock);
517 iter = netlbl_unlhsh_search_addr6(&entry->addr, iface);
518 if (iter != NULL &&
519 (ipv6_addr_equal(&iter->addr, addr) &&
520 ipv6_addr_equal(&iter->mask, mask))) {
521 spin_unlock(&netlbl_unlhsh_lock);
522 kfree(entry);
523 return -EEXIST;
524 }
525 /* in order to speed up address searches through the list (the common
526 * case) we need to keep the list in order based on the size of the
527 * address mask such that the entry with the widest mask (smallest
528 * numerical value) appears first in the list */
529 list_for_each_entry_rcu(iter, &iface->addr6_list, list)
530 if (iter->valid &&
531 ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
532 __list_add_rcu(&entry->list,
533 iter->list.prev,
534 &iter->list);
535 spin_unlock(&netlbl_unlhsh_lock);
536 return 0;
537 }
538 list_add_tail_rcu(&entry->list, &iface->addr6_list);
539 spin_unlock(&netlbl_unlhsh_lock);
540 return 0;
541}
542#endif /* IPv6 */
543
544/**
545 * netlbl_unlhsh_add_iface - Adds a new interface entry to the hash table
546 * @ifindex: network interface
547 *
548 * Description:
549 * Add a new, empty, interface entry into the unlabeled connection hash table.
550 * On success a pointer to the new interface entry is returned, on failure NULL
551 * is returned. The caller is responsible for calling the rcu_read_[un]lock()
552 * functions.
553 *
554 */
555static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex)
556{
557 u32 bkt;
558 struct netlbl_unlhsh_iface *iface;
559
560 iface = kzalloc(sizeof(*iface), GFP_ATOMIC);
561 if (iface == NULL)
562 return NULL;
563
564 iface->ifindex = ifindex;
565 INIT_LIST_HEAD(&iface->addr4_list);
566 INIT_LIST_HEAD(&iface->addr6_list);
567 iface->valid = 1;
568 INIT_RCU_HEAD(&iface->rcu);
569
570 spin_lock(&netlbl_unlhsh_lock);
571 if (ifindex > 0) {
572 bkt = netlbl_unlhsh_hash(ifindex);
573 if (netlbl_unlhsh_search_iface(ifindex) != NULL)
574 goto add_iface_failure;
575 list_add_tail_rcu(&iface->list,
576 &rcu_dereference(netlbl_unlhsh)->tbl[bkt]);
577 } else {
578 INIT_LIST_HEAD(&iface->list);
579 if (rcu_dereference(netlbl_unlhsh_def) != NULL)
580 goto add_iface_failure;
581 rcu_assign_pointer(netlbl_unlhsh_def, iface);
582 }
583 spin_unlock(&netlbl_unlhsh_lock);
584
585 return iface;
586
587add_iface_failure:
588 spin_unlock(&netlbl_unlhsh_lock);
589 kfree(iface);
590 return NULL;
591}
592
593/**
594 * netlbl_unlhsh_add - Adds a new entry to the unlabeled connection hash table
595 * @net: network namespace
596 * @dev_name: interface name
597 * @addr: IP address in network byte order
598 * @mask: address mask in network byte order
599 * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6)
600 * @secid: LSM secid value for the entry
601 * @audit_info: NetLabel audit information
602 *
603 * Description:
604 * Adds a new entry to the unlabeled connection hash table. Returns zero on
605 * success, negative values on failure.
606 *
607 */
608static int netlbl_unlhsh_add(struct net *net,
609 const char *dev_name,
610 const void *addr,
611 const void *mask,
612 u32 addr_len,
613 u32 secid,
614 struct netlbl_audit *audit_info)
615{
616 int ret_val;
617 int ifindex;
618 struct net_device *dev;
619 struct netlbl_unlhsh_iface *iface;
620 struct in_addr *addr4, *mask4;
621 struct in6_addr *addr6, *mask6;
622 struct audit_buffer *audit_buf = NULL;
623 char *secctx = NULL;
624 u32 secctx_len;
625
626 if (addr_len != sizeof(struct in_addr) &&
627 addr_len != sizeof(struct in6_addr))
628 return -EINVAL;
629
630 rcu_read_lock();
631 if (dev_name != NULL) {
632 dev = dev_get_by_name(net, dev_name);
633 if (dev == NULL) {
634 ret_val = -ENODEV;
635 goto unlhsh_add_return;
636 }
637 ifindex = dev->ifindex;
638 dev_put(dev);
639 iface = netlbl_unlhsh_search_iface(ifindex);
640 } else {
641 ifindex = 0;
642 iface = rcu_dereference(netlbl_unlhsh_def);
643 }
644 if (iface == NULL) {
645 iface = netlbl_unlhsh_add_iface(ifindex);
646 if (iface == NULL) {
647 ret_val = -ENOMEM;
648 goto unlhsh_add_return;
649 }
650 }
651 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCADD,
652 audit_info);
653 switch (addr_len) {
654 case sizeof(struct in_addr):
655 addr4 = (struct in_addr *)addr;
656 mask4 = (struct in_addr *)mask;
657 ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid);
658 if (audit_buf != NULL)
659 netlbl_unlabel_audit_addr4(audit_buf,
660 dev_name,
661 addr4->s_addr,
662 mask4->s_addr);
663 break;
664#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
665 case sizeof(struct in6_addr):
666 addr6 = (struct in6_addr *)addr;
667 mask6 = (struct in6_addr *)mask;
668 ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid);
669 if (audit_buf != NULL)
670 netlbl_unlabel_audit_addr6(audit_buf,
671 dev_name,
672 addr6, mask6);
673 break;
674#endif /* IPv6 */
675 default:
676 ret_val = -EINVAL;
677 }
678 if (ret_val == 0)
679 atomic_inc(&netlabel_mgmt_protocount);
680
681unlhsh_add_return:
682 rcu_read_unlock();
683 if (audit_buf != NULL) {
684 if (security_secid_to_secctx(secid,
685 &secctx,
686 &secctx_len) == 0) {
687 audit_log_format(audit_buf, " sec_obj=%s", secctx);
688 security_release_secctx(secctx, secctx_len);
689 }
690 audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
691 audit_log_end(audit_buf);
692 }
693 return ret_val;
694}
695
696/**
697 * netlbl_unlhsh_remove_addr4 - Remove an IPv4 address entry
698 * @net: network namespace
699 * @iface: interface entry
700 * @addr: IP address
701 * @mask: IP address mask
702 * @audit_info: NetLabel audit information
703 *
704 * Description:
705 * Remove an IP address entry from the unlabeled connection hash table.
706 * Returns zero on success, negative values on failure. The caller is
707 * responsible for calling the rcu_read_[un]lock() functions.
708 *
709 */
710static int netlbl_unlhsh_remove_addr4(struct net *net,
711 struct netlbl_unlhsh_iface *iface,
712 const struct in_addr *addr,
713 const struct in_addr *mask,
714 struct netlbl_audit *audit_info)
715{
716 int ret_val = -ENOENT;
717 struct netlbl_unlhsh_addr4 *entry;
718 struct audit_buffer *audit_buf = NULL;
719 struct net_device *dev;
720 char *secctx = NULL;
721 u32 secctx_len;
722
723 spin_lock(&netlbl_unlhsh_lock);
724 entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface);
725 if (entry != NULL &&
726 entry->addr == addr->s_addr && entry->mask == mask->s_addr) {
727 entry->valid = 0;
728 list_del_rcu(&entry->list);
729 ret_val = 0;
730 }
731 spin_unlock(&netlbl_unlhsh_lock);
732
733 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
734 audit_info);
735 if (audit_buf != NULL) {
736 dev = dev_get_by_index(net, iface->ifindex);
737 netlbl_unlabel_audit_addr4(audit_buf,
738 (dev != NULL ? dev->name : NULL),
739 entry->addr, entry->mask);
740 if (dev != NULL)
741 dev_put(dev);
742 if (security_secid_to_secctx(entry->secid,
743 &secctx,
744 &secctx_len) == 0) {
745 audit_log_format(audit_buf, " sec_obj=%s", secctx);
746 security_release_secctx(secctx, secctx_len);
747 }
748 audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
749 audit_log_end(audit_buf);
750 }
751
752 if (ret_val == 0)
753 call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4);
754 return ret_val;
755}
756
757#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
758/**
759 * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry
760 * @net: network namespace
761 * @iface: interface entry
762 * @addr: IP address
763 * @mask: IP address mask
764 * @audit_info: NetLabel audit information
765 *
766 * Description:
767 * Remove an IP address entry from the unlabeled connection hash table.
768 * Returns zero on success, negative values on failure. The caller is
769 * responsible for calling the rcu_read_[un]lock() functions.
770 *
771 */
772static int netlbl_unlhsh_remove_addr6(struct net *net,
773 struct netlbl_unlhsh_iface *iface,
774 const struct in6_addr *addr,
775 const struct in6_addr *mask,
776 struct netlbl_audit *audit_info)
777{
778 int ret_val = -ENOENT;
779 struct netlbl_unlhsh_addr6 *entry;
780 struct audit_buffer *audit_buf = NULL;
781 struct net_device *dev;
782 char *secctx = NULL;
783 u32 secctx_len;
784
785 spin_lock(&netlbl_unlhsh_lock);
786 entry = netlbl_unlhsh_search_addr6(addr, iface);
787 if (entry != NULL &&
788 (ipv6_addr_equal(&entry->addr, addr) &&
789 ipv6_addr_equal(&entry->mask, mask))) {
790 entry->valid = 0;
791 list_del_rcu(&entry->list);
792 ret_val = 0;
793 }
794 spin_unlock(&netlbl_unlhsh_lock);
795
796 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
797 audit_info);
798 if (audit_buf != NULL) {
799 dev = dev_get_by_index(net, iface->ifindex);
800 netlbl_unlabel_audit_addr6(audit_buf,
801 (dev != NULL ? dev->name : NULL),
802 addr, mask);
803 if (dev != NULL)
804 dev_put(dev);
805 if (security_secid_to_secctx(entry->secid,
806 &secctx,
807 &secctx_len) == 0) {
808 audit_log_format(audit_buf, " sec_obj=%s", secctx);
809 security_release_secctx(secctx, secctx_len);
810 }
811 audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
812 audit_log_end(audit_buf);
813 }
814
815 if (ret_val == 0)
816 call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6);
817 return ret_val;
818}
819#endif /* IPv6 */
820
821/**
822 * netlbl_unlhsh_condremove_iface - Remove an interface entry
823 * @iface: the interface entry
824 *
825 * Description:
826 * Remove an interface entry from the unlabeled connection hash table if it is
827 * empty. An interface entry is considered to be empty if there are no
828 * address entries assigned to it.
829 *
830 */
831static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
832{
833 struct netlbl_unlhsh_addr4 *iter4;
834 struct netlbl_unlhsh_addr6 *iter6;
835
836 spin_lock(&netlbl_unlhsh_lock);
837 list_for_each_entry_rcu(iter4, &iface->addr4_list, list)
838 if (iter4->valid)
839 goto unlhsh_condremove_failure;
840 list_for_each_entry_rcu(iter6, &iface->addr6_list, list)
841 if (iter6->valid)
842 goto unlhsh_condremove_failure;
843 iface->valid = 0;
844 if (iface->ifindex > 0)
845 list_del_rcu(&iface->list);
846 else
847 rcu_assign_pointer(netlbl_unlhsh_def, NULL);
848 spin_unlock(&netlbl_unlhsh_lock);
849
850 call_rcu(&iface->rcu, netlbl_unlhsh_free_iface);
851 return;
852
853unlhsh_condremove_failure:
854 spin_unlock(&netlbl_unlhsh_lock);
855 return;
856}
857
858/**
859 * netlbl_unlhsh_remove - Remove an entry from the unlabeled hash table
860 * @net: network namespace
861 * @dev_name: interface name
862 * @addr: IP address in network byte order
863 * @mask: address mask in network byte order
864 * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6)
865 * @audit_info: NetLabel audit information
866 *
867 * Description:
868 * Removes and existing entry from the unlabeled connection hash table.
869 * Returns zero on success, negative values on failure.
870 *
871 */
872static int netlbl_unlhsh_remove(struct net *net,
873 const char *dev_name,
874 const void *addr,
875 const void *mask,
876 u32 addr_len,
877 struct netlbl_audit *audit_info)
878{
879 int ret_val;
880 struct net_device *dev;
881 struct netlbl_unlhsh_iface *iface;
882
883 if (addr_len != sizeof(struct in_addr) &&
884 addr_len != sizeof(struct in6_addr))
885 return -EINVAL;
886
887 rcu_read_lock();
888 if (dev_name != NULL) {
889 dev = dev_get_by_name(net, dev_name);
890 if (dev == NULL) {
891 ret_val = -ENODEV;
892 goto unlhsh_remove_return;
893 }
894 iface = netlbl_unlhsh_search_iface(dev->ifindex);
895 dev_put(dev);
896 } else
897 iface = rcu_dereference(netlbl_unlhsh_def);
898 if (iface == NULL) {
899 ret_val = -ENOENT;
900 goto unlhsh_remove_return;
901 }
902 switch (addr_len) {
903 case sizeof(struct in_addr):
904 ret_val = netlbl_unlhsh_remove_addr4(net,
905 iface, addr, mask,
906 audit_info);
907 break;
908#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
909 case sizeof(struct in6_addr):
910 ret_val = netlbl_unlhsh_remove_addr6(net,
911 iface, addr, mask,
912 audit_info);
913 break;
914#endif /* IPv6 */
915 default:
916 ret_val = -EINVAL;
917 }
918 if (ret_val == 0) {
919 netlbl_unlhsh_condremove_iface(iface);
920 atomic_dec(&netlabel_mgmt_protocount);
921 }
922
923unlhsh_remove_return:
924 rcu_read_unlock();
925 return ret_val;
926}
927
928/*
929 * General Helper Functions
930 */
931
932/**
933 * netlbl_unlhsh_netdev_handler - Network device notification handler
934 * @this: notifier block
935 * @event: the event
936 * @ptr: the network device (cast to void)
937 *
938 * Description:
939 * Handle network device events, although at present all we care about is a
940 * network device going away. In the case of a device going away we clear any
941 * related entries from the unlabeled connection hash table.
942 *
943 */
944static int netlbl_unlhsh_netdev_handler(struct notifier_block *this,
945 unsigned long event,
946 void *ptr)
947{
948 struct net_device *dev = ptr;
949 struct netlbl_unlhsh_iface *iface = NULL;
950
951 if (dev->nd_net != &init_net)
952 return NOTIFY_DONE;
953
954 /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */
955 if (event == NETDEV_DOWN) {
956 spin_lock(&netlbl_unlhsh_lock);
957 iface = netlbl_unlhsh_search_iface(dev->ifindex);
958 if (iface != NULL && iface->valid) {
959 iface->valid = 0;
960 list_del_rcu(&iface->list);
961 } else
962 iface = NULL;
963 spin_unlock(&netlbl_unlhsh_lock);
964 }
965
966 if (iface != NULL)
967 call_rcu(&iface->rcu, netlbl_unlhsh_free_iface);
968
969 return NOTIFY_DONE;
970}
71 971
72/** 972/**
73 * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag 973 * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag
@@ -84,11 +984,8 @@ static void netlbl_unlabel_acceptflg_set(u8 value,
84 struct audit_buffer *audit_buf; 984 struct audit_buffer *audit_buf;
85 u8 old_val; 985 u8 old_val;
86 986
87 spin_lock(&netlabel_unlabel_acceptflg_lock);
88 old_val = netlabel_unlabel_acceptflg; 987 old_val = netlabel_unlabel_acceptflg;
89 netlabel_unlabel_acceptflg = value; 988 netlabel_unlabel_acceptflg = value;
90 spin_unlock(&netlabel_unlabel_acceptflg_lock);
91
92 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, 989 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW,
93 audit_info); 990 audit_info);
94 if (audit_buf != NULL) { 991 if (audit_buf != NULL) {
@@ -98,6 +995,48 @@ static void netlbl_unlabel_acceptflg_set(u8 value,
98 } 995 }
99} 996}
100 997
998/**
999 * netlbl_unlabel_addrinfo_get - Get the IPv4/6 address information
1000 * @info: the Generic NETLINK info block
1001 * @addr: the IP address
1002 * @mask: the IP address mask
1003 * @len: the address length
1004 *
1005 * Description:
1006 * Examine the Generic NETLINK message and extract the IP address information.
1007 * Returns zero on success, negative values on failure.
1008 *
1009 */
1010static int netlbl_unlabel_addrinfo_get(struct genl_info *info,
1011 void **addr,
1012 void **mask,
1013 u32 *len)
1014{
1015 u32 addr_len;
1016
1017 if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) {
1018 addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
1019 if (addr_len != sizeof(struct in_addr) &&
1020 addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK]))
1021 return -EINVAL;
1022 *len = addr_len;
1023 *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
1024 *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4MASK]);
1025 return 0;
1026 } else if (info->attrs[NLBL_UNLABEL_A_IPV6ADDR]) {
1027 addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]);
1028 if (addr_len != sizeof(struct in6_addr) &&
1029 addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV6MASK]))
1030 return -EINVAL;
1031 *len = addr_len;
1032 *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]);
1033 *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6MASK]);
1034 return 0;
1035 }
1036
1037 return -EINVAL;
1038}
1039
101/* 1040/*
102 * NetLabel Command Handlers 1041 * NetLabel Command Handlers
103 */ 1042 */
@@ -155,11 +1094,9 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
155 goto list_failure; 1094 goto list_failure;
156 } 1095 }
157 1096
158 rcu_read_lock();
159 ret_val = nla_put_u8(ans_skb, 1097 ret_val = nla_put_u8(ans_skb,
160 NLBL_UNLABEL_A_ACPTFLG, 1098 NLBL_UNLABEL_A_ACPTFLG,
161 netlabel_unlabel_acceptflg); 1099 netlabel_unlabel_acceptflg);
162 rcu_read_unlock();
163 if (ret_val != 0) 1100 if (ret_val != 0)
164 goto list_failure; 1101 goto list_failure;
165 1102
@@ -175,11 +1112,489 @@ list_failure:
175 return ret_val; 1112 return ret_val;
176} 1113}
177 1114
1115/**
1116 * netlbl_unlabel_staticadd - Handle a STATICADD message
1117 * @skb: the NETLINK buffer
1118 * @info: the Generic NETLINK info block
1119 *
1120 * Description:
1121 * Process a user generated STATICADD message and add a new unlabeled
1122 * connection entry to the hash table. Returns zero on success, negative
1123 * values on failure.
1124 *
1125 */
1126static int netlbl_unlabel_staticadd(struct sk_buff *skb,
1127 struct genl_info *info)
1128{
1129 int ret_val;
1130 char *dev_name;
1131 void *addr;
1132 void *mask;
1133 u32 addr_len;
1134 u32 secid;
1135 struct netlbl_audit audit_info;
1136
1137 /* Don't allow users to add both IPv4 and IPv6 addresses for a
1138 * single entry. However, allow users to create two entries, one each
1139 * for IPv4 and IPv4, with the same LSM security context which should
1140 * achieve the same result. */
1141 if (!info->attrs[NLBL_UNLABEL_A_SECCTX] ||
1142 !info->attrs[NLBL_UNLABEL_A_IFACE] ||
1143 !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
1144 !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
1145 (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
1146 !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
1147 return -EINVAL;
1148
1149 netlbl_netlink_auditinfo(skb, &audit_info);
1150
1151 ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
1152 if (ret_val != 0)
1153 return ret_val;
1154 dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]);
1155 ret_val = security_secctx_to_secid(
1156 nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]),
1157 nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]),
1158 &secid);
1159 if (ret_val != 0)
1160 return ret_val;
1161
1162 return netlbl_unlhsh_add(&init_net,
1163 dev_name, addr, mask, addr_len, secid,
1164 &audit_info);
1165}
1166
1167/**
1168 * netlbl_unlabel_staticadddef - Handle a STATICADDDEF message
1169 * @skb: the NETLINK buffer
1170 * @info: the Generic NETLINK info block
1171 *
1172 * Description:
1173 * Process a user generated STATICADDDEF message and add a new default
1174 * unlabeled connection entry. Returns zero on success, negative values on
1175 * failure.
1176 *
1177 */
1178static int netlbl_unlabel_staticadddef(struct sk_buff *skb,
1179 struct genl_info *info)
1180{
1181 int ret_val;
1182 void *addr;
1183 void *mask;
1184 u32 addr_len;
1185 u32 secid;
1186 struct netlbl_audit audit_info;
1187
1188 /* Don't allow users to add both IPv4 and IPv6 addresses for a
1189 * single entry. However, allow users to create two entries, one each
1190 * for IPv4 and IPv6, with the same LSM security context which should
1191 * achieve the same result. */
1192 if (!info->attrs[NLBL_UNLABEL_A_SECCTX] ||
1193 !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
1194 !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
1195 (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
1196 !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
1197 return -EINVAL;
1198
1199 netlbl_netlink_auditinfo(skb, &audit_info);
1200
1201 ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
1202 if (ret_val != 0)
1203 return ret_val;
1204 ret_val = security_secctx_to_secid(
1205 nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]),
1206 nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]),
1207 &secid);
1208 if (ret_val != 0)
1209 return ret_val;
1210
1211 return netlbl_unlhsh_add(&init_net,
1212 NULL, addr, mask, addr_len, secid,
1213 &audit_info);
1214}
1215
1216/**
1217 * netlbl_unlabel_staticremove - Handle a STATICREMOVE message
1218 * @skb: the NETLINK buffer
1219 * @info: the Generic NETLINK info block
1220 *
1221 * Description:
1222 * Process a user generated STATICREMOVE message and remove the specified
1223 * unlabeled connection entry. Returns zero on success, negative values on
1224 * failure.
1225 *
1226 */
1227static int netlbl_unlabel_staticremove(struct sk_buff *skb,
1228 struct genl_info *info)
1229{
1230 int ret_val;
1231 char *dev_name;
1232 void *addr;
1233 void *mask;
1234 u32 addr_len;
1235 struct netlbl_audit audit_info;
1236
1237 /* See the note in netlbl_unlabel_staticadd() about not allowing both
1238 * IPv4 and IPv6 in the same entry. */
1239 if (!info->attrs[NLBL_UNLABEL_A_IFACE] ||
1240 !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
1241 !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
1242 (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
1243 !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
1244 return -EINVAL;
1245
1246 netlbl_netlink_auditinfo(skb, &audit_info);
1247
1248 ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
1249 if (ret_val != 0)
1250 return ret_val;
1251 dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]);
1252
1253 return netlbl_unlhsh_remove(&init_net,
1254 dev_name, addr, mask, addr_len,
1255 &audit_info);
1256}
1257
1258/**
1259 * netlbl_unlabel_staticremovedef - Handle a STATICREMOVEDEF message
1260 * @skb: the NETLINK buffer
1261 * @info: the Generic NETLINK info block
1262 *
1263 * Description:
1264 * Process a user generated STATICREMOVEDEF message and remove the default
1265 * unlabeled connection entry. Returns zero on success, negative values on
1266 * failure.
1267 *
1268 */
1269static int netlbl_unlabel_staticremovedef(struct sk_buff *skb,
1270 struct genl_info *info)
1271{
1272 int ret_val;
1273 void *addr;
1274 void *mask;
1275 u32 addr_len;
1276 struct netlbl_audit audit_info;
1277
1278 /* See the note in netlbl_unlabel_staticadd() about not allowing both
1279 * IPv4 and IPv6 in the same entry. */
1280 if (!((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
1281 !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
1282 (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
1283 !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
1284 return -EINVAL;
1285
1286 netlbl_netlink_auditinfo(skb, &audit_info);
1287
1288 ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
1289 if (ret_val != 0)
1290 return ret_val;
1291
1292 return netlbl_unlhsh_remove(&init_net,
1293 NULL, addr, mask, addr_len,
1294 &audit_info);
1295}
1296
1297
1298/**
1299 * netlbl_unlabel_staticlist_gen - Generate messages for STATICLIST[DEF]
1300 * @cmd: command/message
1301 * @iface: the interface entry
1302 * @addr4: the IPv4 address entry
1303 * @addr6: the IPv6 address entry
1304 * @arg: the netlbl_unlhsh_walk_arg structure
1305 *
1306 * Description:
1307 * This function is designed to be used to generate a response for a
1308 * STATICLIST or STATICLISTDEF message. When called either @addr4 or @addr6
1309 * can be specified, not both, the other unspecified entry should be set to
1310 * NULL by the caller. Returns the size of the message on success, negative
1311 * values on failure.
1312 *
1313 */
1314static int netlbl_unlabel_staticlist_gen(u32 cmd,
1315 const struct netlbl_unlhsh_iface *iface,
1316 const struct netlbl_unlhsh_addr4 *addr4,
1317 const struct netlbl_unlhsh_addr6 *addr6,
1318 void *arg)
1319{
1320 int ret_val = -ENOMEM;
1321 struct netlbl_unlhsh_walk_arg *cb_arg = arg;
1322 struct net_device *dev;
1323 void *data;
1324 u32 secid;
1325 char *secctx;
1326 u32 secctx_len;
1327
1328 data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
1329 cb_arg->seq, &netlbl_unlabel_gnl_family,
1330 NLM_F_MULTI, cmd);
1331 if (data == NULL)
1332 goto list_cb_failure;
1333
1334 if (iface->ifindex > 0) {
1335 dev = dev_get_by_index(&init_net, iface->ifindex);
1336 ret_val = nla_put_string(cb_arg->skb,
1337 NLBL_UNLABEL_A_IFACE, dev->name);
1338 dev_put(dev);
1339 if (ret_val != 0)
1340 goto list_cb_failure;
1341 }
1342
1343 if (addr4) {
1344 struct in_addr addr_struct;
1345
1346 addr_struct.s_addr = addr4->addr;
1347 ret_val = nla_put(cb_arg->skb,
1348 NLBL_UNLABEL_A_IPV4ADDR,
1349 sizeof(struct in_addr),
1350 &addr_struct);
1351 if (ret_val != 0)
1352 goto list_cb_failure;
1353
1354 addr_struct.s_addr = addr4->mask;
1355 ret_val = nla_put(cb_arg->skb,
1356 NLBL_UNLABEL_A_IPV4MASK,
1357 sizeof(struct in_addr),
1358 &addr_struct);
1359 if (ret_val != 0)
1360 goto list_cb_failure;
1361
1362 secid = addr4->secid;
1363 } else {
1364 ret_val = nla_put(cb_arg->skb,
1365 NLBL_UNLABEL_A_IPV6ADDR,
1366 sizeof(struct in6_addr),
1367 &addr6->addr);
1368 if (ret_val != 0)
1369 goto list_cb_failure;
1370
1371 ret_val = nla_put(cb_arg->skb,
1372 NLBL_UNLABEL_A_IPV6MASK,
1373 sizeof(struct in6_addr),
1374 &addr6->mask);
1375 if (ret_val != 0)
1376 goto list_cb_failure;
1377
1378 secid = addr6->secid;
1379 }
1380
1381 ret_val = security_secid_to_secctx(secid, &secctx, &secctx_len);
1382 if (ret_val != 0)
1383 goto list_cb_failure;
1384 ret_val = nla_put(cb_arg->skb,
1385 NLBL_UNLABEL_A_SECCTX,
1386 secctx_len,
1387 secctx);
1388 security_release_secctx(secctx, secctx_len);
1389 if (ret_val != 0)
1390 goto list_cb_failure;
1391
1392 cb_arg->seq++;
1393 return genlmsg_end(cb_arg->skb, data);
1394
1395list_cb_failure:
1396 genlmsg_cancel(cb_arg->skb, data);
1397 return ret_val;
1398}
1399
1400/**
1401 * netlbl_unlabel_staticlist - Handle a STATICLIST message
1402 * @skb: the NETLINK buffer
1403 * @cb: the NETLINK callback
1404 *
1405 * Description:
1406 * Process a user generated STATICLIST message and dump the unlabeled
1407 * connection hash table in a form suitable for use in a kernel generated
1408 * STATICLIST message. Returns the length of @skb.
1409 *
1410 */
1411static int netlbl_unlabel_staticlist(struct sk_buff *skb,
1412 struct netlink_callback *cb)
1413{
1414 struct netlbl_unlhsh_walk_arg cb_arg;
1415 u32 skip_bkt = cb->args[0];
1416 u32 skip_chain = cb->args[1];
1417 u32 skip_addr4 = cb->args[2];
1418 u32 skip_addr6 = cb->args[3];
1419 u32 iter_bkt;
1420 u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
1421 struct netlbl_unlhsh_iface *iface;
1422 struct netlbl_unlhsh_addr4 *addr4;
1423 struct netlbl_unlhsh_addr6 *addr6;
1424
1425 cb_arg.nl_cb = cb;
1426 cb_arg.skb = skb;
1427 cb_arg.seq = cb->nlh->nlmsg_seq;
1428
1429 rcu_read_lock();
1430 for (iter_bkt = skip_bkt;
1431 iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
1432 iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
1433 list_for_each_entry_rcu(iface,
1434 &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt],
1435 list) {
1436 if (!iface->valid ||
1437 iter_chain++ < skip_chain)
1438 continue;
1439 list_for_each_entry_rcu(addr4,
1440 &iface->addr4_list,
1441 list) {
1442 if (!addr4->valid || iter_addr4++ < skip_addr4)
1443 continue;
1444 if (netlbl_unlabel_staticlist_gen(
1445 NLBL_UNLABEL_C_STATICLIST,
1446 iface,
1447 addr4,
1448 NULL,
1449 &cb_arg) < 0) {
1450 iter_addr4--;
1451 iter_chain--;
1452 goto unlabel_staticlist_return;
1453 }
1454 }
1455 list_for_each_entry_rcu(addr6,
1456 &iface->addr6_list,
1457 list) {
1458 if (!addr6->valid || iter_addr6++ < skip_addr6)
1459 continue;
1460 if (netlbl_unlabel_staticlist_gen(
1461 NLBL_UNLABEL_C_STATICLIST,
1462 iface,
1463 NULL,
1464 addr6,
1465 &cb_arg) < 0) {
1466 iter_addr6--;
1467 iter_chain--;
1468 goto unlabel_staticlist_return;
1469 }
1470 }
1471 }
1472 }
1473
1474unlabel_staticlist_return:
1475 rcu_read_unlock();
1476 cb->args[0] = skip_bkt;
1477 cb->args[1] = skip_chain;
1478 cb->args[2] = skip_addr4;
1479 cb->args[3] = skip_addr6;
1480 return skb->len;
1481}
1482
1483/**
1484 * netlbl_unlabel_staticlistdef - Handle a STATICLISTDEF message
1485 * @skb: the NETLINK buffer
1486 * @cb: the NETLINK callback
1487 *
1488 * Description:
1489 * Process a user generated STATICLISTDEF message and dump the default
1490 * unlabeled connection entry in a form suitable for use in a kernel generated
1491 * STATICLISTDEF message. Returns the length of @skb.
1492 *
1493 */
1494static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
1495 struct netlink_callback *cb)
1496{
1497 struct netlbl_unlhsh_walk_arg cb_arg;
1498 struct netlbl_unlhsh_iface *iface;
1499 u32 skip_addr4 = cb->args[0];
1500 u32 skip_addr6 = cb->args[1];
1501 u32 iter_addr4 = 0, iter_addr6 = 0;
1502 struct netlbl_unlhsh_addr4 *addr4;
1503 struct netlbl_unlhsh_addr6 *addr6;
1504
1505 cb_arg.nl_cb = cb;
1506 cb_arg.skb = skb;
1507 cb_arg.seq = cb->nlh->nlmsg_seq;
1508
1509 rcu_read_lock();
1510 iface = rcu_dereference(netlbl_unlhsh_def);
1511 if (iface == NULL || !iface->valid)
1512 goto unlabel_staticlistdef_return;
1513
1514 list_for_each_entry_rcu(addr4, &iface->addr4_list, list) {
1515 if (!addr4->valid || iter_addr4++ < skip_addr4)
1516 continue;
1517 if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
1518 iface,
1519 addr4,
1520 NULL,
1521 &cb_arg) < 0) {
1522 iter_addr4--;
1523 goto unlabel_staticlistdef_return;
1524 }
1525 }
1526 list_for_each_entry_rcu(addr6, &iface->addr6_list, list) {
1527 if (addr6->valid || iter_addr6++ < skip_addr6)
1528 continue;
1529 if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
1530 iface,
1531 NULL,
1532 addr6,
1533 &cb_arg) < 0) {
1534 iter_addr6--;
1535 goto unlabel_staticlistdef_return;
1536 }
1537 }
1538
1539unlabel_staticlistdef_return:
1540 rcu_read_unlock();
1541 cb->args[0] = skip_addr4;
1542 cb->args[1] = skip_addr6;
1543 return skb->len;
1544}
178 1545
179/* 1546/*
180 * NetLabel Generic NETLINK Command Definitions 1547 * NetLabel Generic NETLINK Command Definitions
181 */ 1548 */
182 1549
1550static struct genl_ops netlbl_unlabel_genl_c_staticadd = {
1551 .cmd = NLBL_UNLABEL_C_STATICADD,
1552 .flags = GENL_ADMIN_PERM,
1553 .policy = netlbl_unlabel_genl_policy,
1554 .doit = netlbl_unlabel_staticadd,
1555 .dumpit = NULL,
1556};
1557
1558static struct genl_ops netlbl_unlabel_genl_c_staticremove = {
1559 .cmd = NLBL_UNLABEL_C_STATICREMOVE,
1560 .flags = GENL_ADMIN_PERM,
1561 .policy = netlbl_unlabel_genl_policy,
1562 .doit = netlbl_unlabel_staticremove,
1563 .dumpit = NULL,
1564};
1565
1566static struct genl_ops netlbl_unlabel_genl_c_staticlist = {
1567 .cmd = NLBL_UNLABEL_C_STATICLIST,
1568 .flags = 0,
1569 .policy = netlbl_unlabel_genl_policy,
1570 .doit = NULL,
1571 .dumpit = netlbl_unlabel_staticlist,
1572};
1573
1574static struct genl_ops netlbl_unlabel_genl_c_staticadddef = {
1575 .cmd = NLBL_UNLABEL_C_STATICADDDEF,
1576 .flags = GENL_ADMIN_PERM,
1577 .policy = netlbl_unlabel_genl_policy,
1578 .doit = netlbl_unlabel_staticadddef,
1579 .dumpit = NULL,
1580};
1581
1582static struct genl_ops netlbl_unlabel_genl_c_staticremovedef = {
1583 .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF,
1584 .flags = GENL_ADMIN_PERM,
1585 .policy = netlbl_unlabel_genl_policy,
1586 .doit = netlbl_unlabel_staticremovedef,
1587 .dumpit = NULL,
1588};
1589
1590static struct genl_ops netlbl_unlabel_genl_c_staticlistdef = {
1591 .cmd = NLBL_UNLABEL_C_STATICLISTDEF,
1592 .flags = 0,
1593 .policy = netlbl_unlabel_genl_policy,
1594 .doit = NULL,
1595 .dumpit = netlbl_unlabel_staticlistdef,
1596};
1597
183static struct genl_ops netlbl_unlabel_genl_c_accept = { 1598static struct genl_ops netlbl_unlabel_genl_c_accept = {
184 .cmd = NLBL_UNLABEL_C_ACCEPT, 1599 .cmd = NLBL_UNLABEL_C_ACCEPT,
185 .flags = GENL_ADMIN_PERM, 1600 .flags = GENL_ADMIN_PERM,
@@ -196,7 +1611,6 @@ static struct genl_ops netlbl_unlabel_genl_c_list = {
196 .dumpit = NULL, 1611 .dumpit = NULL,
197}; 1612};
198 1613
199
200/* 1614/*
201 * NetLabel Generic NETLINK Protocol Functions 1615 * NetLabel Generic NETLINK Protocol Functions
202 */ 1616 */
@@ -218,6 +1632,36 @@ int netlbl_unlabel_genl_init(void)
218 return ret_val; 1632 return ret_val;
219 1633
220 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, 1634 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
1635 &netlbl_unlabel_genl_c_staticadd);
1636 if (ret_val != 0)
1637 return ret_val;
1638
1639 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
1640 &netlbl_unlabel_genl_c_staticremove);
1641 if (ret_val != 0)
1642 return ret_val;
1643
1644 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
1645 &netlbl_unlabel_genl_c_staticlist);
1646 if (ret_val != 0)
1647 return ret_val;
1648
1649 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
1650 &netlbl_unlabel_genl_c_staticadddef);
1651 if (ret_val != 0)
1652 return ret_val;
1653
1654 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
1655 &netlbl_unlabel_genl_c_staticremovedef);
1656 if (ret_val != 0)
1657 return ret_val;
1658
1659 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
1660 &netlbl_unlabel_genl_c_staticlistdef);
1661 if (ret_val != 0)
1662 return ret_val;
1663
1664 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
221 &netlbl_unlabel_genl_c_accept); 1665 &netlbl_unlabel_genl_c_accept);
222 if (ret_val != 0) 1666 if (ret_val != 0)
223 return ret_val; 1667 return ret_val;
@@ -234,8 +1678,58 @@ int netlbl_unlabel_genl_init(void)
234 * NetLabel KAPI Hooks 1678 * NetLabel KAPI Hooks
235 */ 1679 */
236 1680
1681static struct notifier_block netlbl_unlhsh_netdev_notifier = {
1682 .notifier_call = netlbl_unlhsh_netdev_handler,
1683};
1684
1685/**
1686 * netlbl_unlabel_init - Initialize the unlabeled connection hash table
1687 * @size: the number of bits to use for the hash buckets
1688 *
1689 * Description:
1690 * Initializes the unlabeled connection hash table and registers a network
1691 * device notification handler. This function should only be called by the
1692 * NetLabel subsystem itself during initialization. Returns zero on success,
1693 * non-zero values on error.
1694 *
1695 */
1696int netlbl_unlabel_init(u32 size)
1697{
1698 u32 iter;
1699 struct netlbl_unlhsh_tbl *hsh_tbl;
1700
1701 if (size == 0)
1702 return -EINVAL;
1703
1704 hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL);
1705 if (hsh_tbl == NULL)
1706 return -ENOMEM;
1707 hsh_tbl->size = 1 << size;
1708 hsh_tbl->tbl = kcalloc(hsh_tbl->size,
1709 sizeof(struct list_head),
1710 GFP_KERNEL);
1711 if (hsh_tbl->tbl == NULL) {
1712 kfree(hsh_tbl);
1713 return -ENOMEM;
1714 }
1715 for (iter = 0; iter < hsh_tbl->size; iter++)
1716 INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
1717
1718 rcu_read_lock();
1719 spin_lock(&netlbl_unlhsh_lock);
1720 rcu_assign_pointer(netlbl_unlhsh, hsh_tbl);
1721 spin_unlock(&netlbl_unlhsh_lock);
1722 rcu_read_unlock();
1723
1724 register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier);
1725
1726 return 0;
1727}
1728
237/** 1729/**
238 * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet 1730 * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet
1731 * @skb: the packet
1732 * @family: protocol family
239 * @secattr: the security attributes 1733 * @secattr: the security attributes
240 * 1734 *
241 * Description: 1735 * Description:
@@ -243,19 +1737,52 @@ int netlbl_unlabel_genl_init(void)
243 * them in @secattr. Returns zero on success and negative values on failure. 1737 * them in @secattr. Returns zero on success and negative values on failure.
244 * 1738 *
245 */ 1739 */
246int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) 1740int netlbl_unlabel_getattr(const struct sk_buff *skb,
1741 u16 family,
1742 struct netlbl_lsm_secattr *secattr)
247{ 1743{
248 int ret_val; 1744 struct iphdr *hdr4;
1745 struct ipv6hdr *hdr6;
1746 struct netlbl_unlhsh_addr4 *addr4;
1747 struct netlbl_unlhsh_addr6 *addr6;
1748 struct netlbl_unlhsh_iface *iface;
249 1749
250 rcu_read_lock(); 1750 rcu_read_lock();
251 if (netlabel_unlabel_acceptflg == 1) { 1751 iface = netlbl_unlhsh_search_iface_def(skb->iif);
252 netlbl_secattr_init(secattr); 1752 if (iface == NULL)
253 ret_val = 0; 1753 goto unlabel_getattr_nolabel;
254 } else 1754 switch (family) {
255 ret_val = -ENOMSG; 1755 case PF_INET:
1756 hdr4 = ip_hdr(skb);
1757 addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface);
1758 if (addr4 == NULL)
1759 goto unlabel_getattr_nolabel;
1760 secattr->attr.secid = addr4->secid;
1761 break;
1762#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1763 case PF_INET6:
1764 hdr6 = ipv6_hdr(skb);
1765 addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface);
1766 if (addr6 == NULL)
1767 goto unlabel_getattr_nolabel;
1768 secattr->attr.secid = addr6->secid;
1769 break;
1770#endif /* IPv6 */
1771 default:
1772 goto unlabel_getattr_nolabel;
1773 }
256 rcu_read_unlock(); 1774 rcu_read_unlock();
257 1775
258 return ret_val; 1776 secattr->flags |= NETLBL_SECATTR_SECID;
1777 secattr->type = NETLBL_NLTYPE_UNLABELED;
1778 return 0;
1779
1780unlabel_getattr_nolabel:
1781 rcu_read_unlock();
1782 if (netlabel_unlabel_acceptflg == 0)
1783 return -ENOMSG;
1784 secattr->type = NETLBL_NLTYPE_UNLABELED;
1785 return 0;
259} 1786}
260 1787
261/** 1788/**
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index c2917fbb42cf..06b1301ac072 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -36,6 +36,116 @@
36/* 36/*
37 * The following NetLabel payloads are supported by the Unlabeled subsystem. 37 * The following NetLabel payloads are supported by the Unlabeled subsystem.
38 * 38 *
39 * o STATICADD
40 * This message is sent from an application to add a new static label for
41 * incoming unlabeled connections.
42 *
43 * Required attributes:
44 *
45 * NLBL_UNLABEL_A_IFACE
46 * NLBL_UNLABEL_A_SECCTX
47 *
48 * If IPv4 is specified the following attributes are required:
49 *
50 * NLBL_UNLABEL_A_IPV4ADDR
51 * NLBL_UNLABEL_A_IPV4MASK
52 *
53 * If IPv6 is specified the following attributes are required:
54 *
55 * NLBL_UNLABEL_A_IPV6ADDR
56 * NLBL_UNLABEL_A_IPV6MASK
57 *
58 * o STATICREMOVE
59 * This message is sent from an application to remove an existing static
60 * label for incoming unlabeled connections.
61 *
62 * Required attributes:
63 *
64 * NLBL_UNLABEL_A_IFACE
65 *
66 * If IPv4 is specified the following attributes are required:
67 *
68 * NLBL_UNLABEL_A_IPV4ADDR
69 * NLBL_UNLABEL_A_IPV4MASK
70 *
71 * If IPv6 is specified the following attributes are required:
72 *
73 * NLBL_UNLABEL_A_IPV6ADDR
74 * NLBL_UNLABEL_A_IPV6MASK
75 *
76 * o STATICLIST
77 * This message can be sent either from an application or by the kernel in
78 * response to an application generated STATICLIST message. When sent by an
79 * application there is no payload and the NLM_F_DUMP flag should be set.
80 * The kernel should response with a series of the following messages.
81 *
82 * Required attributes:
83 *
84 * NLBL_UNLABEL_A_IFACE
85 * NLBL_UNLABEL_A_SECCTX
86 *
87 * If IPv4 is specified the following attributes are required:
88 *
89 * NLBL_UNLABEL_A_IPV4ADDR
90 * NLBL_UNLABEL_A_IPV4MASK
91 *
92 * If IPv6 is specified the following attributes are required:
93 *
94 * NLBL_UNLABEL_A_IPV6ADDR
95 * NLBL_UNLABEL_A_IPV6MASK
96 *
97 * o STATICADDDEF
98 * This message is sent from an application to set the default static
99 * label for incoming unlabeled connections.
100 *
101 * Required attribute:
102 *
103 * NLBL_UNLABEL_A_SECCTX
104 *
105 * If IPv4 is specified the following attributes are required:
106 *
107 * NLBL_UNLABEL_A_IPV4ADDR
108 * NLBL_UNLABEL_A_IPV4MASK
109 *
110 * If IPv6 is specified the following attributes are required:
111 *
112 * NLBL_UNLABEL_A_IPV6ADDR
113 * NLBL_UNLABEL_A_IPV6MASK
114 *
115 * o STATICREMOVEDEF
116 * This message is sent from an application to remove the existing default
117 * static label for incoming unlabeled connections.
118 *
119 * If IPv4 is specified the following attributes are required:
120 *
121 * NLBL_UNLABEL_A_IPV4ADDR
122 * NLBL_UNLABEL_A_IPV4MASK
123 *
124 * If IPv6 is specified the following attributes are required:
125 *
126 * NLBL_UNLABEL_A_IPV6ADDR
127 * NLBL_UNLABEL_A_IPV6MASK
128 *
129 * o STATICLISTDEF
130 * This message can be sent either from an application or by the kernel in
131 * response to an application generated STATICLISTDEF message. When sent by
132 * an application there is no payload and the NLM_F_DUMP flag should be set.
133 * The kernel should response with the following message.
134 *
135 * Required attribute:
136 *
137 * NLBL_UNLABEL_A_SECCTX
138 *
139 * If IPv4 is specified the following attributes are required:
140 *
141 * NLBL_UNLABEL_A_IPV4ADDR
142 * NLBL_UNLABEL_A_IPV4MASK
143 *
144 * If IPv6 is specified the following attributes are required:
145 *
146 * NLBL_UNLABEL_A_IPV6ADDR
147 * NLBL_UNLABEL_A_IPV6MASK
148 *
39 * o ACCEPT 149 * o ACCEPT
40 * This message is sent from an application to specify if the kernel should 150 * This message is sent from an application to specify if the kernel should
41 * allow unlabled packets to pass if they do not match any of the static 151 * allow unlabled packets to pass if they do not match any of the static
@@ -62,6 +172,12 @@ enum {
62 NLBL_UNLABEL_C_UNSPEC, 172 NLBL_UNLABEL_C_UNSPEC,
63 NLBL_UNLABEL_C_ACCEPT, 173 NLBL_UNLABEL_C_ACCEPT,
64 NLBL_UNLABEL_C_LIST, 174 NLBL_UNLABEL_C_LIST,
175 NLBL_UNLABEL_C_STATICADD,
176 NLBL_UNLABEL_C_STATICREMOVE,
177 NLBL_UNLABEL_C_STATICLIST,
178 NLBL_UNLABEL_C_STATICADDDEF,
179 NLBL_UNLABEL_C_STATICREMOVEDEF,
180 NLBL_UNLABEL_C_STATICLISTDEF,
65 __NLBL_UNLABEL_C_MAX, 181 __NLBL_UNLABEL_C_MAX,
66}; 182};
67#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) 183#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
@@ -73,6 +189,24 @@ enum {
73 /* (NLA_U8) 189 /* (NLA_U8)
74 * if true then unlabeled packets are allowed to pass, else unlabeled 190 * if true then unlabeled packets are allowed to pass, else unlabeled
75 * packets are rejected */ 191 * packets are rejected */
192 NLBL_UNLABEL_A_IPV6ADDR,
193 /* (NLA_BINARY, struct in6_addr)
194 * an IPv6 address */
195 NLBL_UNLABEL_A_IPV6MASK,
196 /* (NLA_BINARY, struct in6_addr)
197 * an IPv6 address mask */
198 NLBL_UNLABEL_A_IPV4ADDR,
199 /* (NLA_BINARY, struct in_addr)
200 * an IPv4 address */
201 NLBL_UNLABEL_A_IPV4MASK,
202 /* (NLA_BINARY, struct in_addr)
203 * and IPv4 address mask */
204 NLBL_UNLABEL_A_IFACE,
205 /* (NLA_NULL_STRING)
206 * network interface */
207 NLBL_UNLABEL_A_SECCTX,
208 /* (NLA_BINARY)
209 * a LSM specific security context */
76 __NLBL_UNLABEL_A_MAX, 210 __NLBL_UNLABEL_A_MAX,
77}; 211};
78#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1) 212#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1)
@@ -80,8 +214,17 @@ enum {
80/* NetLabel protocol functions */ 214/* NetLabel protocol functions */
81int netlbl_unlabel_genl_init(void); 215int netlbl_unlabel_genl_init(void);
82 216
217/* Unlabeled connection hash table size */
218/* XXX - currently this number is an uneducated guess */
219#define NETLBL_UNLHSH_BITSIZE 7
220
221/* General Unlabeled init function */
222int netlbl_unlabel_init(u32 size);
223
83/* Process Unlabeled incoming network packets */ 224/* Process Unlabeled incoming network packets */
84int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr); 225int netlbl_unlabel_getattr(const struct sk_buff *skb,
226 u16 family,
227 struct netlbl_lsm_secattr *secattr);
85 228
86/* Set the default configuration to allow Unlabeled packets */ 229/* Set the default configuration to allow Unlabeled packets */
87int netlbl_unlabel_defconf(void); 230int netlbl_unlabel_defconf(void);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4f994c0fb3f8..1ab0da2632e1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -156,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk)
156 skb_queue_purge(&sk->sk_receive_queue); 156 skb_queue_purge(&sk->sk_receive_queue);
157 157
158 if (!sock_flag(sk, SOCK_DEAD)) { 158 if (!sock_flag(sk, SOCK_DEAD)) {
159 printk("Freeing alive netlink socket %p\n", sk); 159 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
160 return; 160 return;
161 } 161 }
162 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); 162 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
@@ -164,13 +164,14 @@ static void netlink_sock_destruct(struct sock *sk)
164 BUG_TRAP(!nlk_sk(sk)->groups); 164 BUG_TRAP(!nlk_sk(sk)->groups);
165} 165}
166 166
167/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. 167/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
168 * Look, when several writers sleep and reader wakes them up, all but one 168 * SMP. Look, when several writers sleep and reader wakes them up, all but one
169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
170 * this, _but_ remember, it adds useless work on UP machines. 170 * this, _but_ remember, it adds useless work on UP machines.
171 */ 171 */
172 172
173static void netlink_table_grab(void) 173static void netlink_table_grab(void)
174 __acquires(nl_table_lock)
174{ 175{
175 write_lock_irq(&nl_table_lock); 176 write_lock_irq(&nl_table_lock);
176 177
@@ -178,7 +179,7 @@ static void netlink_table_grab(void)
178 DECLARE_WAITQUEUE(wait, current); 179 DECLARE_WAITQUEUE(wait, current);
179 180
180 add_wait_queue_exclusive(&nl_table_wait, &wait); 181 add_wait_queue_exclusive(&nl_table_wait, &wait);
181 for(;;) { 182 for (;;) {
182 set_current_state(TASK_UNINTERRUPTIBLE); 183 set_current_state(TASK_UNINTERRUPTIBLE);
183 if (atomic_read(&nl_table_users) == 0) 184 if (atomic_read(&nl_table_users) == 0)
184 break; 185 break;
@@ -192,13 +193,14 @@ static void netlink_table_grab(void)
192 } 193 }
193} 194}
194 195
195static __inline__ void netlink_table_ungrab(void) 196static void netlink_table_ungrab(void)
197 __releases(nl_table_lock)
196{ 198{
197 write_unlock_irq(&nl_table_lock); 199 write_unlock_irq(&nl_table_lock);
198 wake_up(&nl_table_wait); 200 wake_up(&nl_table_wait);
199} 201}
200 202
201static __inline__ void 203static inline void
202netlink_lock_table(void) 204netlink_lock_table(void)
203{ 205{
204 /* read_lock() synchronizes us to netlink_table_grab */ 206 /* read_lock() synchronizes us to netlink_table_grab */
@@ -208,14 +210,15 @@ netlink_lock_table(void)
208 read_unlock(&nl_table_lock); 210 read_unlock(&nl_table_lock);
209} 211}
210 212
211static __inline__ void 213static inline void
212netlink_unlock_table(void) 214netlink_unlock_table(void)
213{ 215{
214 if (atomic_dec_and_test(&nl_table_users)) 216 if (atomic_dec_and_test(&nl_table_users))
215 wake_up(&nl_table_wait); 217 wake_up(&nl_table_wait);
216} 218}
217 219
218static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) 220static inline struct sock *netlink_lookup(struct net *net, int protocol,
221 u32 pid)
219{ 222{
220 struct nl_pid_hash *hash = &nl_table[protocol].hash; 223 struct nl_pid_hash *hash = &nl_table[protocol].hash;
221 struct hlist_head *head; 224 struct hlist_head *head;
@@ -236,13 +239,14 @@ found:
236 return sk; 239 return sk;
237} 240}
238 241
239static inline struct hlist_head *nl_pid_hash_alloc(size_t size) 242static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
240{ 243{
241 if (size <= PAGE_SIZE) 244 if (size <= PAGE_SIZE)
242 return kmalloc(size, GFP_ATOMIC); 245 return kzalloc(size, GFP_ATOMIC);
243 else 246 else
244 return (struct hlist_head *) 247 return (struct hlist_head *)
245 __get_free_pages(GFP_ATOMIC, get_order(size)); 248 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
249 get_order(size));
246} 250}
247 251
248static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 252static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
@@ -271,11 +275,10 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
271 size *= 2; 275 size *= 2;
272 } 276 }
273 277
274 table = nl_pid_hash_alloc(size); 278 table = nl_pid_hash_zalloc(size);
275 if (!table) 279 if (!table)
276 return 0; 280 return 0;
277 281
278 memset(table, 0, size);
279 otable = hash->table; 282 otable = hash->table;
280 hash->table = table; 283 hash->table = table;
281 hash->mask = mask; 284 hash->mask = mask;
@@ -396,7 +399,7 @@ static int __netlink_create(struct net *net, struct socket *sock,
396 399
397 sock->ops = &netlink_ops; 400 sock->ops = &netlink_ops;
398 401
399 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); 402 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
400 if (!sk) 403 if (!sk)
401 return -ENOMEM; 404 return -ENOMEM;
402 405
@@ -428,7 +431,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
428 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 431 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
429 return -ESOCKTNOSUPPORT; 432 return -ESOCKTNOSUPPORT;
430 433
431 if (protocol<0 || protocol >= MAX_LINKS) 434 if (protocol < 0 || protocol >= MAX_LINKS)
432 return -EPROTONOSUPPORT; 435 return -EPROTONOSUPPORT;
433 436
434 netlink_lock_table(); 437 netlink_lock_table();
@@ -445,7 +448,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
445 cb_mutex = nl_table[protocol].cb_mutex; 448 cb_mutex = nl_table[protocol].cb_mutex;
446 netlink_unlock_table(); 449 netlink_unlock_table();
447 450
448 if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) 451 err = __netlink_create(net, sock, cb_mutex, protocol);
452 if (err < 0)
449 goto out_module; 453 goto out_module;
450 454
451 nlk = nlk_sk(sock->sk); 455 nlk = nlk_sk(sock->sk);
@@ -494,9 +498,12 @@ static int netlink_release(struct socket *sock)
494 498
495 netlink_table_grab(); 499 netlink_table_grab();
496 if (netlink_is_kernel(sk)) { 500 if (netlink_is_kernel(sk)) {
497 kfree(nl_table[sk->sk_protocol].listeners); 501 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
498 nl_table[sk->sk_protocol].module = NULL; 502 if (--nl_table[sk->sk_protocol].registered == 0) {
499 nl_table[sk->sk_protocol].registered = 0; 503 kfree(nl_table[sk->sk_protocol].listeners);
504 nl_table[sk->sk_protocol].module = NULL;
505 nl_table[sk->sk_protocol].registered = 0;
506 }
500 } else if (nlk->subscriptions) 507 } else if (nlk->subscriptions)
501 netlink_update_listeners(sk); 508 netlink_update_listeners(sk);
502 netlink_table_ungrab(); 509 netlink_table_ungrab();
@@ -590,7 +597,7 @@ static int netlink_realloc_groups(struct sock *sk)
590 err = -ENOMEM; 597 err = -ENOMEM;
591 goto out_unlock; 598 goto out_unlock;
592 } 599 }
593 memset((char*)new_groups + NLGRPSZ(nlk->ngroups), 0, 600 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
594 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 601 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
595 602
596 nlk->groups = new_groups; 603 nlk->groups = new_groups;
@@ -600,7 +607,8 @@ static int netlink_realloc_groups(struct sock *sk)
600 return err; 607 return err;
601} 608}
602 609
603static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 610static int netlink_bind(struct socket *sock, struct sockaddr *addr,
611 int addr_len)
604{ 612{
605 struct sock *sk = sock->sk; 613 struct sock *sk = sock->sk;
606 struct net *net = sk->sk_net; 614 struct net *net = sk->sk_net;
@@ -651,7 +659,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
651 int err = 0; 659 int err = 0;
652 struct sock *sk = sock->sk; 660 struct sock *sk = sock->sk;
653 struct netlink_sock *nlk = nlk_sk(sk); 661 struct netlink_sock *nlk = nlk_sk(sk);
654 struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; 662 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
655 663
656 if (addr->sa_family == AF_UNSPEC) { 664 if (addr->sa_family == AF_UNSPEC) {
657 sk->sk_state = NETLINK_UNCONNECTED; 665 sk->sk_state = NETLINK_UNCONNECTED;
@@ -678,11 +686,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
678 return err; 686 return err;
679} 687}
680 688
681static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) 689static int netlink_getname(struct socket *sock, struct sockaddr *addr,
690 int *addr_len, int peer)
682{ 691{
683 struct sock *sk = sock->sk; 692 struct sock *sk = sock->sk;
684 struct netlink_sock *nlk = nlk_sk(sk); 693 struct netlink_sock *nlk = nlk_sk(sk);
685 struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; 694 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
686 695
687 nladdr->nl_family = AF_NETLINK; 696 nladdr->nl_family = AF_NETLINK;
688 nladdr->nl_pad = 0; 697 nladdr->nl_pad = 0;
@@ -752,7 +761,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
752 * 1: repeat lookup - reference dropped while waiting for socket memory. 761 * 1: repeat lookup - reference dropped while waiting for socket memory.
753 */ 762 */
754int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, 763int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
755 long timeo, struct sock *ssk) 764 long *timeo, struct sock *ssk)
756{ 765{
757 struct netlink_sock *nlk; 766 struct netlink_sock *nlk;
758 767
@@ -761,7 +770,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
761 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 770 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
762 test_bit(0, &nlk->state)) { 771 test_bit(0, &nlk->state)) {
763 DECLARE_WAITQUEUE(wait, current); 772 DECLARE_WAITQUEUE(wait, current);
764 if (!timeo) { 773 if (!*timeo) {
765 if (!ssk || netlink_is_kernel(ssk)) 774 if (!ssk || netlink_is_kernel(ssk))
766 netlink_overrun(sk); 775 netlink_overrun(sk);
767 sock_put(sk); 776 sock_put(sk);
@@ -775,7 +784,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
775 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 784 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
776 test_bit(0, &nlk->state)) && 785 test_bit(0, &nlk->state)) &&
777 !sock_flag(sk, SOCK_DEAD)) 786 !sock_flag(sk, SOCK_DEAD))
778 timeo = schedule_timeout(timeo); 787 *timeo = schedule_timeout(*timeo);
779 788
780 __set_current_state(TASK_RUNNING); 789 __set_current_state(TASK_RUNNING);
781 remove_wait_queue(&nlk->wait, &wait); 790 remove_wait_queue(&nlk->wait, &wait);
@@ -783,7 +792,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
783 792
784 if (signal_pending(current)) { 793 if (signal_pending(current)) {
785 kfree_skb(skb); 794 kfree_skb(skb);
786 return sock_intr_errno(timeo); 795 return sock_intr_errno(*timeo);
787 } 796 }
788 return 1; 797 return 1;
789 } 798 }
@@ -877,7 +886,7 @@ retry:
877 if (netlink_is_kernel(sk)) 886 if (netlink_is_kernel(sk))
878 return netlink_unicast_kernel(sk, skb); 887 return netlink_unicast_kernel(sk, skb);
879 888
880 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); 889 err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk);
881 if (err == 1) 890 if (err == 1)
882 goto retry; 891 goto retry;
883 if (err) 892 if (err)
@@ -885,6 +894,7 @@ retry:
885 894
886 return netlink_sendskb(sk, skb); 895 return netlink_sendskb(sk, skb);
887} 896}
897EXPORT_SYMBOL(netlink_unicast);
888 898
889int netlink_has_listeners(struct sock *sk, unsigned int group) 899int netlink_has_listeners(struct sock *sk, unsigned int group)
890{ 900{
@@ -905,7 +915,8 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
905} 915}
906EXPORT_SYMBOL_GPL(netlink_has_listeners); 916EXPORT_SYMBOL_GPL(netlink_has_listeners);
907 917
908static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 918static inline int netlink_broadcast_deliver(struct sock *sk,
919 struct sk_buff *skb)
909{ 920{
910 struct netlink_sock *nlk = nlk_sk(sk); 921 struct netlink_sock *nlk = nlk_sk(sk);
911 922
@@ -1026,6 +1037,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1026 return -ENOBUFS; 1037 return -ENOBUFS;
1027 return -ESRCH; 1038 return -ESRCH;
1028} 1039}
1040EXPORT_SYMBOL(netlink_broadcast);
1029 1041
1030struct netlink_set_err_data { 1042struct netlink_set_err_data {
1031 struct sock *exclude_sk; 1043 struct sock *exclude_sk;
@@ -1182,7 +1194,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1182 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1194 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1183 struct sock *sk = sock->sk; 1195 struct sock *sk = sock->sk;
1184 struct netlink_sock *nlk = nlk_sk(sk); 1196 struct netlink_sock *nlk = nlk_sk(sk);
1185 struct sockaddr_nl *addr=msg->msg_name; 1197 struct sockaddr_nl *addr = msg->msg_name;
1186 u32 dst_pid; 1198 u32 dst_pid;
1187 u32 dst_group; 1199 u32 dst_group;
1188 struct sk_buff *skb; 1200 struct sk_buff *skb;
@@ -1221,12 +1233,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1221 goto out; 1233 goto out;
1222 err = -ENOBUFS; 1234 err = -ENOBUFS;
1223 skb = alloc_skb(len, GFP_KERNEL); 1235 skb = alloc_skb(len, GFP_KERNEL);
1224 if (skb==NULL) 1236 if (skb == NULL)
1225 goto out; 1237 goto out;
1226 1238
1227 NETLINK_CB(skb).pid = nlk->pid; 1239 NETLINK_CB(skb).pid = nlk->pid;
1228 NETLINK_CB(skb).dst_group = dst_group; 1240 NETLINK_CB(skb).dst_group = dst_group;
1229 NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); 1241 NETLINK_CB(skb).loginuid = audit_get_loginuid(current);
1230 selinux_get_task_sid(current, &(NETLINK_CB(skb).sid)); 1242 selinux_get_task_sid(current, &(NETLINK_CB(skb).sid));
1231 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1243 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1232 1244
@@ -1237,7 +1249,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1237 */ 1249 */
1238 1250
1239 err = -EFAULT; 1251 err = -EFAULT;
1240 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) { 1252 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1241 kfree_skb(skb); 1253 kfree_skb(skb);
1242 goto out; 1254 goto out;
1243 } 1255 }
@@ -1276,8 +1288,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1276 1288
1277 copied = 0; 1289 copied = 0;
1278 1290
1279 skb = skb_recv_datagram(sk,flags,noblock,&err); 1291 skb = skb_recv_datagram(sk, flags, noblock, &err);
1280 if (skb==NULL) 1292 if (skb == NULL)
1281 goto out; 1293 goto out;
1282 1294
1283 msg->msg_namelen = 0; 1295 msg->msg_namelen = 0;
@@ -1292,7 +1304,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1292 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1304 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1293 1305
1294 if (msg->msg_name) { 1306 if (msg->msg_name) {
1295 struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name; 1307 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1296 addr->nl_family = AF_NETLINK; 1308 addr->nl_family = AF_NETLINK;
1297 addr->nl_pad = 0; 1309 addr->nl_pad = 0;
1298 addr->nl_pid = NETLINK_CB(skb).pid; 1310 addr->nl_pid = NETLINK_CB(skb).pid;
@@ -1332,6 +1344,22 @@ static void netlink_data_ready(struct sock *sk, int len)
1332 * queueing. 1344 * queueing.
1333 */ 1345 */
1334 1346
1347static void __netlink_release(struct sock *sk)
1348{
1349 /*
1350 * Last sock_put should drop referrence to sk->sk_net. It has already
1351 * been dropped in netlink_kernel_create. Taking referrence to stopping
1352 * namespace is not an option.
1353 * Take referrence to a socket to remove it from netlink lookup table
1354 * _alive_ and after that destroy it in the context of init_net.
1355 */
1356
1357 sock_hold(sk);
1358 sock_release(sk->sk_socket);
1359 sk->sk_net = get_net(&init_net);
1360 sock_put(sk);
1361}
1362
1335struct sock * 1363struct sock *
1336netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1364netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1337 void (*input)(struct sk_buff *skb), 1365 void (*input)(struct sk_buff *skb),
@@ -1344,14 +1372,24 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1344 1372
1345 BUG_ON(!nl_table); 1373 BUG_ON(!nl_table);
1346 1374
1347 if (unit<0 || unit>=MAX_LINKS) 1375 if (unit < 0 || unit >= MAX_LINKS)
1348 return NULL; 1376 return NULL;
1349 1377
1350 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1378 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1351 return NULL; 1379 return NULL;
1352 1380
1353 if (__netlink_create(net, sock, cb_mutex, unit) < 0) 1381 /*
1354 goto out_sock_release; 1382 * We have to just have a reference on the net from sk, but don't
1383 * get_net it. Besides, we cannot get and then put the net here.
1384 * So we create one inside init_net and the move it to net.
1385 */
1386
1387 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1388 goto out_sock_release_nosk;
1389
1390 sk = sock->sk;
1391 put_net(sk->sk_net);
1392 sk->sk_net = net;
1355 1393
1356 if (groups < 32) 1394 if (groups < 32)
1357 groups = 32; 1395 groups = 32;
@@ -1360,7 +1398,6 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1360 if (!listeners) 1398 if (!listeners)
1361 goto out_sock_release; 1399 goto out_sock_release;
1362 1400
1363 sk = sock->sk;
1364 sk->sk_data_ready = netlink_data_ready; 1401 sk->sk_data_ready = netlink_data_ready;
1365 if (input) 1402 if (input)
1366 nlk_sk(sk)->netlink_rcv = input; 1403 nlk_sk(sk)->netlink_rcv = input;
@@ -1380,16 +1417,33 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1380 nl_table[unit].registered = 1; 1417 nl_table[unit].registered = 1;
1381 } else { 1418 } else {
1382 kfree(listeners); 1419 kfree(listeners);
1420 nl_table[unit].registered++;
1383 } 1421 }
1384 netlink_table_ungrab(); 1422 netlink_table_ungrab();
1385
1386 return sk; 1423 return sk;
1387 1424
1388out_sock_release: 1425out_sock_release:
1389 kfree(listeners); 1426 kfree(listeners);
1427 __netlink_release(sk);
1428 return NULL;
1429
1430out_sock_release_nosk:
1390 sock_release(sock); 1431 sock_release(sock);
1391 return NULL; 1432 return NULL;
1392} 1433}
1434EXPORT_SYMBOL(netlink_kernel_create);
1435
1436
1437void
1438netlink_kernel_release(struct sock *sk)
1439{
1440 if (sk == NULL || sk->sk_socket == NULL)
1441 return;
1442
1443 __netlink_release(sk);
1444}
1445EXPORT_SYMBOL(netlink_kernel_release);
1446
1393 1447
1394/** 1448/**
1395 * netlink_change_ngroups - change number of multicast groups 1449 * netlink_change_ngroups - change number of multicast groups
@@ -1461,6 +1515,7 @@ void netlink_set_nonroot(int protocol, unsigned int flags)
1461 if ((unsigned int)protocol < MAX_LINKS) 1515 if ((unsigned int)protocol < MAX_LINKS)
1462 nl_table[protocol].nl_nonroot = flags; 1516 nl_table[protocol].nl_nonroot = flags;
1463} 1517}
1518EXPORT_SYMBOL(netlink_set_nonroot);
1464 1519
1465static void netlink_destroy_callback(struct netlink_callback *cb) 1520static void netlink_destroy_callback(struct netlink_callback *cb)
1466{ 1521{
@@ -1529,8 +1584,9 @@ errout:
1529 1584
1530int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1585int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1531 struct nlmsghdr *nlh, 1586 struct nlmsghdr *nlh,
1532 int (*dump)(struct sk_buff *skb, struct netlink_callback*), 1587 int (*dump)(struct sk_buff *skb,
1533 int (*done)(struct netlink_callback*)) 1588 struct netlink_callback *),
1589 int (*done)(struct netlink_callback *))
1534{ 1590{
1535 struct netlink_callback *cb; 1591 struct netlink_callback *cb;
1536 struct sock *sk; 1592 struct sock *sk;
@@ -1571,6 +1627,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1571 */ 1627 */
1572 return -EINTR; 1628 return -EINTR;
1573} 1629}
1630EXPORT_SYMBOL(netlink_dump_start);
1574 1631
1575void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1632void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1576{ 1633{
@@ -1605,6 +1662,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1605 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1662 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1606 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1663 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1607} 1664}
1665EXPORT_SYMBOL(netlink_ack);
1608 1666
1609int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1667int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1610 struct nlmsghdr *)) 1668 struct nlmsghdr *))
@@ -1638,7 +1696,7 @@ ack:
1638 netlink_ack(skb, nlh, err); 1696 netlink_ack(skb, nlh, err);
1639 1697
1640skip: 1698skip:
1641 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1699 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1642 if (msglen > skb->len) 1700 if (msglen > skb->len)
1643 msglen = skb->len; 1701 msglen = skb->len;
1644 skb_pull(skb, msglen); 1702 skb_pull(skb, msglen);
@@ -1646,6 +1704,7 @@ skip:
1646 1704
1647 return 0; 1705 return 0;
1648} 1706}
1707EXPORT_SYMBOL(netlink_rcv_skb);
1649 1708
1650/** 1709/**
1651 * nlmsg_notify - send a notification netlink message 1710 * nlmsg_notify - send a notification netlink message
@@ -1678,10 +1737,11 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1678 1737
1679 return err; 1738 return err;
1680} 1739}
1740EXPORT_SYMBOL(nlmsg_notify);
1681 1741
1682#ifdef CONFIG_PROC_FS 1742#ifdef CONFIG_PROC_FS
1683struct nl_seq_iter { 1743struct nl_seq_iter {
1684 struct net *net; 1744 struct seq_net_private p;
1685 int link; 1745 int link;
1686 int hash_idx; 1746 int hash_idx;
1687}; 1747};
@@ -1694,12 +1754,12 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1694 struct hlist_node *node; 1754 struct hlist_node *node;
1695 loff_t off = 0; 1755 loff_t off = 0;
1696 1756
1697 for (i=0; i<MAX_LINKS; i++) { 1757 for (i = 0; i < MAX_LINKS; i++) {
1698 struct nl_pid_hash *hash = &nl_table[i].hash; 1758 struct nl_pid_hash *hash = &nl_table[i].hash;
1699 1759
1700 for (j = 0; j <= hash->mask; j++) { 1760 for (j = 0; j <= hash->mask; j++) {
1701 sk_for_each(s, node, &hash->table[j]) { 1761 sk_for_each(s, node, &hash->table[j]) {
1702 if (iter->net != s->sk_net) 1762 if (iter->p.net != s->sk_net)
1703 continue; 1763 continue;
1704 if (off == pos) { 1764 if (off == pos) {
1705 iter->link = i; 1765 iter->link = i;
@@ -1714,6 +1774,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1714} 1774}
1715 1775
1716static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1776static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1777 __acquires(nl_table_lock)
1717{ 1778{
1718 read_lock(&nl_table_lock); 1779 read_lock(&nl_table_lock);
1719 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1780 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -1734,7 +1795,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1734 s = v; 1795 s = v;
1735 do { 1796 do {
1736 s = sk_next(s); 1797 s = sk_next(s);
1737 } while (s && (iter->net != s->sk_net)); 1798 } while (s && (iter->p.net != s->sk_net));
1738 if (s) 1799 if (s)
1739 return s; 1800 return s;
1740 1801
@@ -1746,7 +1807,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1746 1807
1747 for (; j <= hash->mask; j++) { 1808 for (; j <= hash->mask; j++) {
1748 s = sk_head(&hash->table[j]); 1809 s = sk_head(&hash->table[j]);
1749 while (s && (iter->net != s->sk_net)) 1810 while (s && (iter->p.net != s->sk_net))
1750 s = sk_next(s); 1811 s = sk_next(s);
1751 if (s) { 1812 if (s) {
1752 iter->link = i; 1813 iter->link = i;
@@ -1762,6 +1823,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1762} 1823}
1763 1824
1764static void netlink_seq_stop(struct seq_file *seq, void *v) 1825static void netlink_seq_stop(struct seq_file *seq, void *v)
1826 __releases(nl_table_lock)
1765{ 1827{
1766 read_unlock(&nl_table_lock); 1828 read_unlock(&nl_table_lock);
1767} 1829}
@@ -1802,27 +1864,8 @@ static const struct seq_operations netlink_seq_ops = {
1802 1864
1803static int netlink_seq_open(struct inode *inode, struct file *file) 1865static int netlink_seq_open(struct inode *inode, struct file *file)
1804{ 1866{
1805 struct nl_seq_iter *iter; 1867 return seq_open_net(inode, file, &netlink_seq_ops,
1806 1868 sizeof(struct nl_seq_iter));
1807 iter = __seq_open_private(file, &netlink_seq_ops, sizeof(*iter));
1808 if (!iter)
1809 return -ENOMEM;
1810
1811 iter->net = get_proc_net(inode);
1812 if (!iter->net) {
1813 seq_release_private(inode, file);
1814 return -ENXIO;
1815 }
1816
1817 return 0;
1818}
1819
1820static int netlink_seq_release(struct inode *inode, struct file *file)
1821{
1822 struct seq_file *seq = file->private_data;
1823 struct nl_seq_iter *iter = seq->private;
1824 put_net(iter->net);
1825 return seq_release_private(inode, file);
1826} 1869}
1827 1870
1828static const struct file_operations netlink_seq_fops = { 1871static const struct file_operations netlink_seq_fops = {
@@ -1830,7 +1873,7 @@ static const struct file_operations netlink_seq_fops = {
1830 .open = netlink_seq_open, 1873 .open = netlink_seq_open,
1831 .read = seq_read, 1874 .read = seq_read,
1832 .llseek = seq_lseek, 1875 .llseek = seq_lseek,
1833 .release = netlink_seq_release, 1876 .release = seq_release_net,
1834}; 1877};
1835 1878
1836#endif 1879#endif
@@ -1839,11 +1882,13 @@ int netlink_register_notifier(struct notifier_block *nb)
1839{ 1882{
1840 return atomic_notifier_chain_register(&netlink_chain, nb); 1883 return atomic_notifier_chain_register(&netlink_chain, nb);
1841} 1884}
1885EXPORT_SYMBOL(netlink_register_notifier);
1842 1886
1843int netlink_unregister_notifier(struct notifier_block *nb) 1887int netlink_unregister_notifier(struct notifier_block *nb)
1844{ 1888{
1845 return atomic_notifier_chain_unregister(&netlink_chain, nb); 1889 return atomic_notifier_chain_unregister(&netlink_chain, nb);
1846} 1890}
1891EXPORT_SYMBOL(netlink_unregister_notifier);
1847 1892
1848static const struct proto_ops netlink_ops = { 1893static const struct proto_ops netlink_ops = {
1849 .family = PF_NETLINK, 1894 .family = PF_NETLINK,
@@ -1888,7 +1933,7 @@ static void __net_exit netlink_net_exit(struct net *net)
1888#endif 1933#endif
1889} 1934}
1890 1935
1891static struct pernet_operations netlink_net_ops = { 1936static struct pernet_operations __net_initdata netlink_net_ops = {
1892 .init = netlink_net_init, 1937 .init = netlink_net_init,
1893 .exit = netlink_net_exit, 1938 .exit = netlink_net_exit,
1894}; 1939};
@@ -1922,7 +1967,7 @@ static int __init netlink_proto_init(void)
1922 for (i = 0; i < MAX_LINKS; i++) { 1967 for (i = 0; i < MAX_LINKS; i++) {
1923 struct nl_pid_hash *hash = &nl_table[i].hash; 1968 struct nl_pid_hash *hash = &nl_table[i].hash;
1924 1969
1925 hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); 1970 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
1926 if (!hash->table) { 1971 if (!hash->table) {
1927 while (i-- > 0) 1972 while (i-- > 0)
1928 nl_pid_hash_free(nl_table[i].hash.table, 1973 nl_pid_hash_free(nl_table[i].hash.table,
@@ -1930,7 +1975,6 @@ static int __init netlink_proto_init(void)
1930 kfree(nl_table); 1975 kfree(nl_table);
1931 goto panic; 1976 goto panic;
1932 } 1977 }
1933 memset(hash->table, 0, 1 * sizeof(*hash->table));
1934 hash->max_shift = order; 1978 hash->max_shift = order;
1935 hash->shift = 0; 1979 hash->shift = 0;
1936 hash->mask = 0; 1980 hash->mask = 0;
@@ -1948,14 +1992,3 @@ panic:
1948} 1992}
1949 1993
1950core_initcall(netlink_proto_init); 1994core_initcall(netlink_proto_init);
1951
1952EXPORT_SYMBOL(netlink_ack);
1953EXPORT_SYMBOL(netlink_rcv_skb);
1954EXPORT_SYMBOL(netlink_broadcast);
1955EXPORT_SYMBOL(netlink_dump_start);
1956EXPORT_SYMBOL(netlink_kernel_create);
1957EXPORT_SYMBOL(netlink_register_notifier);
1958EXPORT_SYMBOL(netlink_set_nonroot);
1959EXPORT_SYMBOL(netlink_unicast);
1960EXPORT_SYMBOL(netlink_unregister_notifier);
1961EXPORT_SYMBOL(nlmsg_notify);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index ec39d12c2423..feb326f4a752 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -430,6 +430,24 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
430 return 0; 430 return 0;
431} 431}
432 432
433/**
434 * nla_append - Add a netlink attribute without header or padding
435 * @skb: socket buffer to add attribute to
436 * @attrlen: length of attribute payload
437 * @data: head of attribute payload
438 *
439 * Returns -1 if the tailroom of the skb is insufficient to store
440 * the attribute payload.
441 */
442int nla_append(struct sk_buff *skb, int attrlen, const void *data)
443{
444 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
445 return -1;
446
447 memcpy(skb_put(skb, attrlen), data, attrlen);
448 return 0;
449}
450
433EXPORT_SYMBOL(nla_validate); 451EXPORT_SYMBOL(nla_validate);
434EXPORT_SYMBOL(nla_parse); 452EXPORT_SYMBOL(nla_parse);
435EXPORT_SYMBOL(nla_find); 453EXPORT_SYMBOL(nla_find);
@@ -445,3 +463,4 @@ EXPORT_SYMBOL(nla_put_nohdr);
445EXPORT_SYMBOL(nla_memcpy); 463EXPORT_SYMBOL(nla_memcpy);
446EXPORT_SYMBOL(nla_memcmp); 464EXPORT_SYMBOL(nla_memcmp);
447EXPORT_SYMBOL(nla_strcmp); 465EXPORT_SYMBOL(nla_strcmp);
466EXPORT_SYMBOL(nla_append);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 3a4d479ea64e..972250c974f1 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -423,7 +423,8 @@ static int nr_create(struct net *net, struct socket *sock, int protocol)
423 if (sock->type != SOCK_SEQPACKET || protocol != 0) 423 if (sock->type != SOCK_SEQPACKET || protocol != 0)
424 return -ESOCKTNOSUPPORT; 424 return -ESOCKTNOSUPPORT;
425 425
426 if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) 426 sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto);
427 if (sk == NULL)
427 return -ENOMEM; 428 return -ENOMEM;
428 429
429 nr = nr_sk(sk); 430 nr = nr_sk(sk);
@@ -465,7 +466,8 @@ static struct sock *nr_make_new(struct sock *osk)
465 if (osk->sk_type != SOCK_SEQPACKET) 466 if (osk->sk_type != SOCK_SEQPACKET)
466 return NULL; 467 return NULL;
467 468
468 if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) 469 sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot);
470 if (sk == NULL)
469 return NULL; 471 return NULL;
470 472
471 nr = nr_sk(sk); 473 nr = nr_sk(sk);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 8c68da5ef0a1..6caf459665f2 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -56,7 +56,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
56 56
57 /* Spoof incoming device */ 57 /* Spoof incoming device */
58 skb->dev = dev; 58 skb->dev = dev;
59 skb_reset_mac_header(skb); 59 skb->mac_header = skb->network_header;
60 skb_reset_network_header(skb); 60 skb_reset_network_header(skb);
61 skb->pkt_type = PACKET_HOST; 61 skb->pkt_type = PACKET_HOST;
62 62
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 6cfaad952c6c..1cb98e88f5e1 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -40,21 +40,10 @@ void nr_init_timers(struct sock *sk)
40{ 40{
41 struct nr_sock *nr = nr_sk(sk); 41 struct nr_sock *nr = nr_sk(sk);
42 42
43 init_timer(&nr->t1timer); 43 setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk);
44 nr->t1timer.data = (unsigned long)sk; 44 setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk);
45 nr->t1timer.function = &nr_t1timer_expiry; 45 setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk);
46 46 setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
47 init_timer(&nr->t2timer);
48 nr->t2timer.data = (unsigned long)sk;
49 nr->t2timer.function = &nr_t2timer_expiry;
50
51 init_timer(&nr->t4timer);
52 nr->t4timer.data = (unsigned long)sk;
53 nr->t4timer.function = &nr_t4timer_expiry;
54
55 init_timer(&nr->idletimer);
56 nr->idletimer.data = (unsigned long)sk;
57 nr->idletimer.function = &nr_idletimer_expiry;
58 47
59 /* initialized by sock_init_data */ 48 /* initialized by sock_init_data */
60 sk->sk_timer.data = (unsigned long)sk; 49 sk->sk_timer.data = (unsigned long)sk;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 2ea68da01fb8..34c96c9674df 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -170,29 +170,15 @@ static ctl_table nr_table[] = {
170 { .ctl_name = 0 } 170 { .ctl_name = 0 }
171}; 171};
172 172
173static ctl_table nr_dir_table[] = { 173static struct ctl_path nr_path[] = {
174 { 174 { .procname = "net", .ctl_name = CTL_NET, },
175 .ctl_name = NET_NETROM, 175 { .procname = "netrom", .ctl_name = NET_NETROM, },
176 .procname = "netrom", 176 { }
177 .mode = 0555,
178 .child = nr_table
179 },
180 { .ctl_name = 0 }
181};
182
183static ctl_table nr_root_table[] = {
184 {
185 .ctl_name = CTL_NET,
186 .procname = "net",
187 .mode = 0555,
188 .child = nr_dir_table
189 },
190 { .ctl_name = 0 }
191}; 177};
192 178
193void __init nr_register_sysctl(void) 179void __init nr_register_sysctl(void)
194{ 180{
195 nr_table_header = register_sysctl_table(nr_root_table); 181 nr_table_header = register_sysctl_paths(nr_path, nr_table);
196} 182}
197 183
198void nr_unregister_sysctl(void) 184void nr_unregister_sysctl(void)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d0936506b731..b8b827c7062d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -135,13 +135,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it)
135 packet classifier depends on it. 135 packet classifier depends on it.
136 */ 136 */
137 137
138/* List of all packet sockets. */
139static HLIST_HEAD(packet_sklist);
140static DEFINE_RWLOCK(packet_sklist_lock);
141
142static atomic_t packet_socks_nr;
143
144
145/* Private packet socket structures. */ 138/* Private packet socket structures. */
146 139
147struct packet_mclist 140struct packet_mclist
@@ -236,10 +229,7 @@ static void packet_sock_destruct(struct sock *sk)
236 return; 229 return;
237 } 230 }
238 231
239 atomic_dec(&packet_socks_nr); 232 sk_refcnt_debug_dec(sk);
240#ifdef PACKET_REFCNT_DEBUG
241 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
242#endif
243} 233}
244 234
245 235
@@ -252,9 +242,6 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
252 struct sock *sk; 242 struct sock *sk;
253 struct sockaddr_pkt *spkt; 243 struct sockaddr_pkt *spkt;
254 244
255 if (dev->nd_net != &init_net)
256 goto out;
257
258 /* 245 /*
259 * When we registered the protocol we saved the socket in the data 246 * When we registered the protocol we saved the socket in the data
260 * field for just this event. 247 * field for just this event.
@@ -276,6 +263,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
276 if (skb->pkt_type == PACKET_LOOPBACK) 263 if (skb->pkt_type == PACKET_LOOPBACK)
277 goto out; 264 goto out;
278 265
266 if (dev->nd_net != sk->sk_net)
267 goto out;
268
279 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 269 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
280 goto oom; 270 goto oom;
281 271
@@ -347,7 +337,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
347 */ 337 */
348 338
349 saddr->spkt_device[13] = 0; 339 saddr->spkt_device[13] = 0;
350 dev = dev_get_by_name(&init_net, saddr->spkt_device); 340 dev = dev_get_by_name(sk->sk_net, saddr->spkt_device);
351 err = -ENODEV; 341 err = -ENODEV;
352 if (dev == NULL) 342 if (dev == NULL)
353 goto out_unlock; 343 goto out_unlock;
@@ -455,15 +445,15 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
455 int skb_len = skb->len; 445 int skb_len = skb->len;
456 unsigned int snaplen, res; 446 unsigned int snaplen, res;
457 447
458 if (dev->nd_net != &init_net)
459 goto drop;
460
461 if (skb->pkt_type == PACKET_LOOPBACK) 448 if (skb->pkt_type == PACKET_LOOPBACK)
462 goto drop; 449 goto drop;
463 450
464 sk = pt->af_packet_priv; 451 sk = pt->af_packet_priv;
465 po = pkt_sk(sk); 452 po = pkt_sk(sk);
466 453
454 if (dev->nd_net != sk->sk_net)
455 goto drop;
456
467 skb->dev = dev; 457 skb->dev = dev;
468 458
469 if (dev->header_ops) { 459 if (dev->header_ops) {
@@ -515,7 +505,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
515 sll->sll_hatype = dev->type; 505 sll->sll_hatype = dev->type;
516 sll->sll_protocol = skb->protocol; 506 sll->sll_protocol = skb->protocol;
517 sll->sll_pkttype = skb->pkt_type; 507 sll->sll_pkttype = skb->pkt_type;
518 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST) 508 if (unlikely(po->origdev))
519 sll->sll_ifindex = orig_dev->ifindex; 509 sll->sll_ifindex = orig_dev->ifindex;
520 else 510 else
521 sll->sll_ifindex = dev->ifindex; 511 sll->sll_ifindex = dev->ifindex;
@@ -572,15 +562,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
572 struct sk_buff *copy_skb = NULL; 562 struct sk_buff *copy_skb = NULL;
573 struct timeval tv; 563 struct timeval tv;
574 564
575 if (dev->nd_net != &init_net)
576 goto drop;
577
578 if (skb->pkt_type == PACKET_LOOPBACK) 565 if (skb->pkt_type == PACKET_LOOPBACK)
579 goto drop; 566 goto drop;
580 567
581 sk = pt->af_packet_priv; 568 sk = pt->af_packet_priv;
582 po = pkt_sk(sk); 569 po = pkt_sk(sk);
583 570
571 if (dev->nd_net != sk->sk_net)
572 goto drop;
573
584 if (dev->header_ops) { 574 if (dev->header_ops) {
585 if (sk->sk_type != SOCK_DGRAM) 575 if (sk->sk_type != SOCK_DGRAM)
586 skb_push(skb, skb->data - skb_mac_header(skb)); 576 skb_push(skb, skb->data - skb_mac_header(skb));
@@ -661,7 +651,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
661 sll->sll_hatype = dev->type; 651 sll->sll_hatype = dev->type;
662 sll->sll_protocol = skb->protocol; 652 sll->sll_protocol = skb->protocol;
663 sll->sll_pkttype = skb->pkt_type; 653 sll->sll_pkttype = skb->pkt_type;
664 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST) 654 if (unlikely(po->origdev))
665 sll->sll_ifindex = orig_dev->ifindex; 655 sll->sll_ifindex = orig_dev->ifindex;
666 else 656 else
667 sll->sll_ifindex = dev->ifindex; 657 sll->sll_ifindex = dev->ifindex;
@@ -738,7 +728,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
738 } 728 }
739 729
740 730
741 dev = dev_get_by_index(&init_net, ifindex); 731 dev = dev_get_by_index(sk->sk_net, ifindex);
742 err = -ENXIO; 732 err = -ENXIO;
743 if (dev == NULL) 733 if (dev == NULL)
744 goto out_unlock; 734 goto out_unlock;
@@ -805,15 +795,17 @@ static int packet_release(struct socket *sock)
805{ 795{
806 struct sock *sk = sock->sk; 796 struct sock *sk = sock->sk;
807 struct packet_sock *po; 797 struct packet_sock *po;
798 struct net *net;
808 799
809 if (!sk) 800 if (!sk)
810 return 0; 801 return 0;
811 802
803 net = sk->sk_net;
812 po = pkt_sk(sk); 804 po = pkt_sk(sk);
813 805
814 write_lock_bh(&packet_sklist_lock); 806 write_lock_bh(&net->packet.sklist_lock);
815 sk_del_node_init(sk); 807 sk_del_node_init(sk);
816 write_unlock_bh(&packet_sklist_lock); 808 write_unlock_bh(&net->packet.sklist_lock);
817 809
818 /* 810 /*
819 * Unhook packet receive handler. 811 * Unhook packet receive handler.
@@ -849,6 +841,7 @@ static int packet_release(struct socket *sock)
849 /* Purge queues */ 841 /* Purge queues */
850 842
851 skb_queue_purge(&sk->sk_receive_queue); 843 skb_queue_purge(&sk->sk_receive_queue);
844 sk_refcnt_debug_release(sk);
852 845
853 sock_put(sk); 846 sock_put(sk);
854 return 0; 847 return 0;
@@ -886,20 +879,14 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc
886 if (protocol == 0) 879 if (protocol == 0)
887 goto out_unlock; 880 goto out_unlock;
888 881
889 if (dev) { 882 if (!dev || (dev->flags & IFF_UP)) {
890 if (dev->flags&IFF_UP) {
891 dev_add_pack(&po->prot_hook);
892 sock_hold(sk);
893 po->running = 1;
894 } else {
895 sk->sk_err = ENETDOWN;
896 if (!sock_flag(sk, SOCK_DEAD))
897 sk->sk_error_report(sk);
898 }
899 } else {
900 dev_add_pack(&po->prot_hook); 883 dev_add_pack(&po->prot_hook);
901 sock_hold(sk); 884 sock_hold(sk);
902 po->running = 1; 885 po->running = 1;
886 } else {
887 sk->sk_err = ENETDOWN;
888 if (!sock_flag(sk, SOCK_DEAD))
889 sk->sk_error_report(sk);
903 } 890 }
904 891
905out_unlock: 892out_unlock:
@@ -927,7 +914,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add
927 return -EINVAL; 914 return -EINVAL;
928 strlcpy(name,uaddr->sa_data,sizeof(name)); 915 strlcpy(name,uaddr->sa_data,sizeof(name));
929 916
930 dev = dev_get_by_name(&init_net, name); 917 dev = dev_get_by_name(sk->sk_net, name);
931 if (dev) { 918 if (dev) {
932 err = packet_do_bind(sk, dev, pkt_sk(sk)->num); 919 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
933 dev_put(dev); 920 dev_put(dev);
@@ -954,7 +941,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
954 941
955 if (sll->sll_ifindex) { 942 if (sll->sll_ifindex) {
956 err = -ENODEV; 943 err = -ENODEV;
957 dev = dev_get_by_index(&init_net, sll->sll_ifindex); 944 dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex);
958 if (dev == NULL) 945 if (dev == NULL)
959 goto out; 946 goto out;
960 } 947 }
@@ -983,9 +970,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
983 __be16 proto = (__force __be16)protocol; /* weird, but documented */ 970 __be16 proto = (__force __be16)protocol; /* weird, but documented */
984 int err; 971 int err;
985 972
986 if (net != &init_net)
987 return -EAFNOSUPPORT;
988
989 if (!capable(CAP_NET_RAW)) 973 if (!capable(CAP_NET_RAW))
990 return -EPERM; 974 return -EPERM;
991 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && 975 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
@@ -995,7 +979,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
995 sock->state = SS_UNCONNECTED; 979 sock->state = SS_UNCONNECTED;
996 980
997 err = -ENOBUFS; 981 err = -ENOBUFS;
998 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); 982 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
999 if (sk == NULL) 983 if (sk == NULL)
1000 goto out; 984 goto out;
1001 985
@@ -1010,7 +994,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
1010 po->num = proto; 994 po->num = proto;
1011 995
1012 sk->sk_destruct = packet_sock_destruct; 996 sk->sk_destruct = packet_sock_destruct;
1013 atomic_inc(&packet_socks_nr); 997 sk_refcnt_debug_inc(sk);
1014 998
1015 /* 999 /*
1016 * Attach a protocol block 1000 * Attach a protocol block
@@ -1031,9 +1015,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
1031 po->running = 1; 1015 po->running = 1;
1032 } 1016 }
1033 1017
1034 write_lock_bh(&packet_sklist_lock); 1018 write_lock_bh(&net->packet.sklist_lock);
1035 sk_add_node(sk, &packet_sklist); 1019 sk_add_node(sk, &net->packet.sklist);
1036 write_unlock_bh(&packet_sklist_lock); 1020 write_unlock_bh(&net->packet.sklist_lock);
1037 return(0); 1021 return(0);
1038out: 1022out:
1039 return err; 1023 return err;
@@ -1151,7 +1135,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1151 return -EOPNOTSUPP; 1135 return -EOPNOTSUPP;
1152 1136
1153 uaddr->sa_family = AF_PACKET; 1137 uaddr->sa_family = AF_PACKET;
1154 dev = dev_get_by_index(&init_net, pkt_sk(sk)->ifindex); 1138 dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex);
1155 if (dev) { 1139 if (dev) {
1156 strlcpy(uaddr->sa_data, dev->name, 15); 1140 strlcpy(uaddr->sa_data, dev->name, 15);
1157 dev_put(dev); 1141 dev_put(dev);
@@ -1176,7 +1160,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1176 sll->sll_family = AF_PACKET; 1160 sll->sll_family = AF_PACKET;
1177 sll->sll_ifindex = po->ifindex; 1161 sll->sll_ifindex = po->ifindex;
1178 sll->sll_protocol = po->num; 1162 sll->sll_protocol = po->num;
1179 dev = dev_get_by_index(&init_net, po->ifindex); 1163 dev = dev_get_by_index(sk->sk_net, po->ifindex);
1180 if (dev) { 1164 if (dev) {
1181 sll->sll_hatype = dev->type; 1165 sll->sll_hatype = dev->type;
1182 sll->sll_halen = dev->addr_len; 1166 sll->sll_halen = dev->addr_len;
@@ -1228,7 +1212,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1228 rtnl_lock(); 1212 rtnl_lock();
1229 1213
1230 err = -ENODEV; 1214 err = -ENODEV;
1231 dev = __dev_get_by_index(&init_net, mreq->mr_ifindex); 1215 dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex);
1232 if (!dev) 1216 if (!dev)
1233 goto done; 1217 goto done;
1234 1218
@@ -1282,7 +1266,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1282 if (--ml->count == 0) { 1266 if (--ml->count == 0) {
1283 struct net_device *dev; 1267 struct net_device *dev;
1284 *mlp = ml->next; 1268 *mlp = ml->next;
1285 dev = dev_get_by_index(&init_net, ml->ifindex); 1269 dev = dev_get_by_index(sk->sk_net, ml->ifindex);
1286 if (dev) { 1270 if (dev) {
1287 packet_dev_mc(dev, ml, -1); 1271 packet_dev_mc(dev, ml, -1);
1288 dev_put(dev); 1272 dev_put(dev);
@@ -1310,7 +1294,7 @@ static void packet_flush_mclist(struct sock *sk)
1310 struct net_device *dev; 1294 struct net_device *dev;
1311 1295
1312 po->mclist = ml->next; 1296 po->mclist = ml->next;
1313 if ((dev = dev_get_by_index(&init_net, ml->ifindex)) != NULL) { 1297 if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) {
1314 packet_dev_mc(dev, ml, -1); 1298 packet_dev_mc(dev, ml, -1);
1315 dev_put(dev); 1299 dev_put(dev);
1316 } 1300 }
@@ -1466,12 +1450,10 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
1466 struct sock *sk; 1450 struct sock *sk;
1467 struct hlist_node *node; 1451 struct hlist_node *node;
1468 struct net_device *dev = data; 1452 struct net_device *dev = data;
1453 struct net *net = dev->nd_net;
1469 1454
1470 if (dev->nd_net != &init_net) 1455 read_lock(&net->packet.sklist_lock);
1471 return NOTIFY_DONE; 1456 sk_for_each(sk, node, &net->packet.sklist) {
1472
1473 read_lock(&packet_sklist_lock);
1474 sk_for_each(sk, node, &packet_sklist) {
1475 struct packet_sock *po = pkt_sk(sk); 1457 struct packet_sock *po = pkt_sk(sk);
1476 1458
1477 switch (msg) { 1459 switch (msg) {
@@ -1510,7 +1492,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
1510 break; 1492 break;
1511 } 1493 }
1512 } 1494 }
1513 read_unlock(&packet_sklist_lock); 1495 read_unlock(&net->packet.sklist_lock);
1514 return NOTIFY_DONE; 1496 return NOTIFY_DONE;
1515} 1497}
1516 1498
@@ -1558,6 +1540,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
1558 case SIOCGIFDSTADDR: 1540 case SIOCGIFDSTADDR:
1559 case SIOCSIFDSTADDR: 1541 case SIOCSIFDSTADDR:
1560 case SIOCSIFFLAGS: 1542 case SIOCSIFFLAGS:
1543 if (sk->sk_net != &init_net)
1544 return -ENOIOCTLCMD;
1561 return inet_dgram_ops.ioctl(sock, cmd, arg); 1545 return inet_dgram_ops.ioctl(sock, cmd, arg);
1562#endif 1546#endif
1563 1547
@@ -1873,12 +1857,12 @@ static struct notifier_block packet_netdev_notifier = {
1873}; 1857};
1874 1858
1875#ifdef CONFIG_PROC_FS 1859#ifdef CONFIG_PROC_FS
1876static inline struct sock *packet_seq_idx(loff_t off) 1860static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
1877{ 1861{
1878 struct sock *s; 1862 struct sock *s;
1879 struct hlist_node *node; 1863 struct hlist_node *node;
1880 1864
1881 sk_for_each(s, node, &packet_sklist) { 1865 sk_for_each(s, node, &net->packet.sklist) {
1882 if (!off--) 1866 if (!off--)
1883 return s; 1867 return s;
1884 } 1868 }
@@ -1886,22 +1870,27 @@ static inline struct sock *packet_seq_idx(loff_t off)
1886} 1870}
1887 1871
1888static void *packet_seq_start(struct seq_file *seq, loff_t *pos) 1872static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1873 __acquires(seq_file_net(seq)->packet.sklist_lock)
1889{ 1874{
1890 read_lock(&packet_sklist_lock); 1875 struct net *net = seq_file_net(seq);
1891 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN; 1876 read_lock(&net->packet.sklist_lock);
1877 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
1892} 1878}
1893 1879
1894static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1880static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1895{ 1881{
1882 struct net *net = seq_file_net(seq);
1896 ++*pos; 1883 ++*pos;
1897 return (v == SEQ_START_TOKEN) 1884 return (v == SEQ_START_TOKEN)
1898 ? sk_head(&packet_sklist) 1885 ? sk_head(&net->packet.sklist)
1899 : sk_next((struct sock*)v) ; 1886 : sk_next((struct sock*)v) ;
1900} 1887}
1901 1888
1902static void packet_seq_stop(struct seq_file *seq, void *v) 1889static void packet_seq_stop(struct seq_file *seq, void *v)
1890 __releases(seq_file_net(seq)->packet.sklist_lock)
1903{ 1891{
1904 read_unlock(&packet_sklist_lock); 1892 struct net *net = seq_file_net(seq);
1893 read_unlock(&net->packet.sklist_lock);
1905} 1894}
1906 1895
1907static int packet_seq_show(struct seq_file *seq, void *v) 1896static int packet_seq_show(struct seq_file *seq, void *v)
@@ -1937,7 +1926,8 @@ static const struct seq_operations packet_seq_ops = {
1937 1926
1938static int packet_seq_open(struct inode *inode, struct file *file) 1927static int packet_seq_open(struct inode *inode, struct file *file)
1939{ 1928{
1940 return seq_open(file, &packet_seq_ops); 1929 return seq_open_net(inode, file, &packet_seq_ops,
1930 sizeof(struct seq_net_private));
1941} 1931}
1942 1932
1943static const struct file_operations packet_seq_fops = { 1933static const struct file_operations packet_seq_fops = {
@@ -1945,15 +1935,37 @@ static const struct file_operations packet_seq_fops = {
1945 .open = packet_seq_open, 1935 .open = packet_seq_open,
1946 .read = seq_read, 1936 .read = seq_read,
1947 .llseek = seq_lseek, 1937 .llseek = seq_lseek,
1948 .release = seq_release, 1938 .release = seq_release_net,
1949}; 1939};
1950 1940
1951#endif 1941#endif
1952 1942
1943static int packet_net_init(struct net *net)
1944{
1945 rwlock_init(&net->packet.sklist_lock);
1946 INIT_HLIST_HEAD(&net->packet.sklist);
1947
1948 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
1949 return -ENOMEM;
1950
1951 return 0;
1952}
1953
1954static void packet_net_exit(struct net *net)
1955{
1956 proc_net_remove(net, "packet");
1957}
1958
1959static struct pernet_operations packet_net_ops = {
1960 .init = packet_net_init,
1961 .exit = packet_net_exit,
1962};
1963
1964
1953static void __exit packet_exit(void) 1965static void __exit packet_exit(void)
1954{ 1966{
1955 proc_net_remove(&init_net, "packet");
1956 unregister_netdevice_notifier(&packet_netdev_notifier); 1967 unregister_netdevice_notifier(&packet_netdev_notifier);
1968 unregister_pernet_subsys(&packet_net_ops);
1957 sock_unregister(PF_PACKET); 1969 sock_unregister(PF_PACKET);
1958 proto_unregister(&packet_proto); 1970 proto_unregister(&packet_proto);
1959} 1971}
@@ -1966,8 +1978,8 @@ static int __init packet_init(void)
1966 goto out; 1978 goto out;
1967 1979
1968 sock_register(&packet_family_ops); 1980 sock_register(&packet_family_ops);
1981 register_pernet_subsys(&packet_net_ops);
1969 register_netdevice_notifier(&packet_netdev_notifier); 1982 register_netdevice_notifier(&packet_netdev_notifier);
1970 proc_net_fops_create(&init_net, "packet", 0, &packet_seq_fops);
1971out: 1983out:
1972 return rc; 1984 return rc;
1973} 1985}
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index d1e9d68f8ba0..e4b051dbed61 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -84,6 +84,7 @@ static void rfkill_schedule_toggle(struct rfkill_task *task)
84static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN); 84static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN);
85static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH); 85static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH);
86static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB); 86static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB);
87static DEFINE_RFKILL_TASK(rfkill_wimax, RFKILL_TYPE_WIMAX);
87 88
88static void rfkill_event(struct input_handle *handle, unsigned int type, 89static void rfkill_event(struct input_handle *handle, unsigned int type,
89 unsigned int code, int down) 90 unsigned int code, int down)
@@ -99,6 +100,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
99 case KEY_UWB: 100 case KEY_UWB:
100 rfkill_schedule_toggle(&rfkill_uwb); 101 rfkill_schedule_toggle(&rfkill_uwb);
101 break; 102 break;
103 case KEY_WIMAX:
104 rfkill_schedule_toggle(&rfkill_wimax);
105 break;
102 default: 106 default:
103 break; 107 break;
104 } 108 }
@@ -159,6 +163,11 @@ static const struct input_device_id rfkill_ids[] = {
159 .evbit = { BIT_MASK(EV_KEY) }, 163 .evbit = { BIT_MASK(EV_KEY) },
160 .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) }, 164 .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) },
161 }, 165 },
166 {
167 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
168 .evbit = { BIT_MASK(EV_KEY) },
169 .keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
170 },
162 { } 171 { }
163}; 172};
164 173
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 51d151c0e962..6562f868e82f 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -27,6 +27,10 @@
27#include <linux/mutex.h> 27#include <linux/mutex.h>
28#include <linux/rfkill.h> 28#include <linux/rfkill.h>
29 29
30/* Get declaration of rfkill_switch_all() to shut up sparse. */
31#include "rfkill-input.h"
32
33
30MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>"); 34MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>");
31MODULE_VERSION("1.0"); 35MODULE_VERSION("1.0");
32MODULE_DESCRIPTION("RF switch support"); 36MODULE_DESCRIPTION("RF switch support");
@@ -56,11 +60,7 @@ static void rfkill_led_trigger(struct rfkill *rfkill,
56static int rfkill_toggle_radio(struct rfkill *rfkill, 60static int rfkill_toggle_radio(struct rfkill *rfkill,
57 enum rfkill_state state) 61 enum rfkill_state state)
58{ 62{
59 int retval; 63 int retval = 0;
60
61 retval = mutex_lock_interruptible(&rfkill->mutex);
62 if (retval)
63 return retval;
64 64
65 if (state != rfkill->state) { 65 if (state != rfkill->state) {
66 retval = rfkill->toggle_radio(rfkill->data, state); 66 retval = rfkill->toggle_radio(rfkill->data, state);
@@ -70,7 +70,6 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
70 } 70 }
71 } 71 }
72 72
73 mutex_unlock(&rfkill->mutex);
74 return retval; 73 return retval;
75} 74}
76 75
@@ -127,6 +126,9 @@ static ssize_t rfkill_type_show(struct device *dev,
127 case RFKILL_TYPE_UWB: 126 case RFKILL_TYPE_UWB:
128 type = "ultrawideband"; 127 type = "ultrawideband";
129 break; 128 break;
129 case RFKILL_TYPE_WIMAX:
130 type = "wimax";
131 break;
130 default: 132 default:
131 BUG(); 133 BUG();
132 } 134 }
@@ -154,12 +156,13 @@ static ssize_t rfkill_state_store(struct device *dev,
154 if (!capable(CAP_NET_ADMIN)) 156 if (!capable(CAP_NET_ADMIN))
155 return -EPERM; 157 return -EPERM;
156 158
159 if (mutex_lock_interruptible(&rfkill->mutex))
160 return -ERESTARTSYS;
157 error = rfkill_toggle_radio(rfkill, 161 error = rfkill_toggle_radio(rfkill,
158 state ? RFKILL_STATE_ON : RFKILL_STATE_OFF); 162 state ? RFKILL_STATE_ON : RFKILL_STATE_OFF);
159 if (error) 163 mutex_unlock(&rfkill->mutex);
160 return error;
161 164
162 return count; 165 return error ? error : count;
163} 166}
164 167
165static ssize_t rfkill_claim_show(struct device *dev, 168static ssize_t rfkill_claim_show(struct device *dev,
@@ -276,21 +279,17 @@ static struct class rfkill_class = {
276 279
277static int rfkill_add_switch(struct rfkill *rfkill) 280static int rfkill_add_switch(struct rfkill *rfkill)
278{ 281{
279 int retval; 282 int error;
280
281 retval = mutex_lock_interruptible(&rfkill_mutex);
282 if (retval)
283 return retval;
284 283
285 retval = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]); 284 mutex_lock(&rfkill_mutex);
286 if (retval)
287 goto out;
288 285
289 list_add_tail(&rfkill->node, &rfkill_list); 286 error = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]);
287 if (!error)
288 list_add_tail(&rfkill->node, &rfkill_list);
290 289
291 out:
292 mutex_unlock(&rfkill_mutex); 290 mutex_unlock(&rfkill_mutex);
293 return retval; 291
292 return error;
294} 293}
295 294
296static void rfkill_remove_switch(struct rfkill *rfkill) 295static void rfkill_remove_switch(struct rfkill *rfkill)
@@ -387,20 +386,26 @@ int rfkill_register(struct rfkill *rfkill)
387 386
388 if (!rfkill->toggle_radio) 387 if (!rfkill->toggle_radio)
389 return -EINVAL; 388 return -EINVAL;
390 389 if (rfkill->type >= RFKILL_TYPE_MAX)
391 error = rfkill_add_switch(rfkill); 390 return -EINVAL;
392 if (error)
393 return error;
394 391
395 snprintf(dev->bus_id, sizeof(dev->bus_id), 392 snprintf(dev->bus_id, sizeof(dev->bus_id),
396 "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); 393 "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1);
397 394
395 rfkill_led_trigger_register(rfkill);
396
397 error = rfkill_add_switch(rfkill);
398 if (error) {
399 rfkill_led_trigger_unregister(rfkill);
400 return error;
401 }
402
398 error = device_add(dev); 403 error = device_add(dev);
399 if (error) { 404 if (error) {
405 rfkill_led_trigger_unregister(rfkill);
400 rfkill_remove_switch(rfkill); 406 rfkill_remove_switch(rfkill);
401 return error; 407 return error;
402 } 408 }
403 rfkill_led_trigger_register(rfkill);
404 409
405 return 0; 410 return 0;
406} 411}
@@ -416,9 +421,9 @@ EXPORT_SYMBOL(rfkill_register);
416 */ 421 */
417void rfkill_unregister(struct rfkill *rfkill) 422void rfkill_unregister(struct rfkill *rfkill)
418{ 423{
419 rfkill_led_trigger_unregister(rfkill);
420 device_del(&rfkill->dev); 424 device_del(&rfkill->dev);
421 rfkill_remove_switch(rfkill); 425 rfkill_remove_switch(rfkill);
426 rfkill_led_trigger_unregister(rfkill);
422 put_device(&rfkill->dev); 427 put_device(&rfkill->dev);
423} 428}
424EXPORT_SYMBOL(rfkill_unregister); 429EXPORT_SYMBOL(rfkill_unregister);
@@ -448,5 +453,5 @@ static void __exit rfkill_exit(void)
448 class_unregister(&rfkill_class); 453 class_unregister(&rfkill_class);
449} 454}
450 455
451module_init(rfkill_init); 456subsys_initcall(rfkill_init);
452module_exit(rfkill_exit); 457module_exit(rfkill_exit);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 509defe53ee5..4a31a81059ab 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -116,7 +116,7 @@ int rosecmp(rose_address *addr1, rose_address *addr2)
116 */ 116 */
117int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask) 117int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask)
118{ 118{
119 int i, j; 119 unsigned int i, j;
120 120
121 if (mask > 10) 121 if (mask > 10)
122 return 1; 122 return 1;
@@ -345,10 +345,9 @@ void rose_destroy_socket(struct sock *sk)
345 if (atomic_read(&sk->sk_wmem_alloc) || 345 if (atomic_read(&sk->sk_wmem_alloc) ||
346 atomic_read(&sk->sk_rmem_alloc)) { 346 atomic_read(&sk->sk_rmem_alloc)) {
347 /* Defer: outstanding buffers */ 347 /* Defer: outstanding buffers */
348 init_timer(&sk->sk_timer); 348 setup_timer(&sk->sk_timer, rose_destroy_timer,
349 (unsigned long)sk);
349 sk->sk_timer.expires = jiffies + 10 * HZ; 350 sk->sk_timer.expires = jiffies + 10 * HZ;
350 sk->sk_timer.function = rose_destroy_timer;
351 sk->sk_timer.data = (unsigned long)sk;
352 add_timer(&sk->sk_timer); 351 add_timer(&sk->sk_timer);
353 } else 352 } else
354 sock_put(sk); 353 sock_put(sk);
@@ -513,7 +512,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol)
513 if (sock->type != SOCK_SEQPACKET || protocol != 0) 512 if (sock->type != SOCK_SEQPACKET || protocol != 0)
514 return -ESOCKTNOSUPPORT; 513 return -ESOCKTNOSUPPORT;
515 514
516 if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) 515 sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto);
516 if (sk == NULL)
517 return -ENOMEM; 517 return -ENOMEM;
518 518
519 rose = rose_sk(sk); 519 rose = rose_sk(sk);
@@ -551,7 +551,8 @@ static struct sock *rose_make_new(struct sock *osk)
551 if (osk->sk_type != SOCK_SEQPACKET) 551 if (osk->sk_type != SOCK_SEQPACKET)
552 return NULL; 552 return NULL;
553 553
554 if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) 554 sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto);
555 if (sk == NULL)
555 return NULL; 556 return NULL;
556 557
557 rose = rose_sk(sk); 558 rose = rose_sk(sk);
@@ -972,8 +973,8 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
972 */ 973 */
973 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 974 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
974 975
975 len = (((skb->data[3] >> 4) & 0x0F) + 1) / 2; 976 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
976 len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2; 977 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
977 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { 978 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
978 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); 979 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
979 return 0; 980 return 0;
@@ -1376,6 +1377,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1376 1377
1377#ifdef CONFIG_PROC_FS 1378#ifdef CONFIG_PROC_FS
1378static void *rose_info_start(struct seq_file *seq, loff_t *pos) 1379static void *rose_info_start(struct seq_file *seq, loff_t *pos)
1380 __acquires(rose_list_lock)
1379{ 1381{
1380 int i; 1382 int i;
1381 struct sock *s; 1383 struct sock *s;
@@ -1403,6 +1405,7 @@ static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos)
1403} 1405}
1404 1406
1405static void rose_info_stop(struct seq_file *seq, void *v) 1407static void rose_info_stop(struct seq_file *seq, void *v)
1408 __releases(rose_list_lock)
1406{ 1409{
1407 spin_unlock_bh(&rose_list_lock); 1410 spin_unlock_bh(&rose_list_lock);
1408} 1411}
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 1b6741f1d746..12cfcf09556b 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -55,13 +55,13 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev,
55 55
56static int rose_rebuild_header(struct sk_buff *skb) 56static int rose_rebuild_header(struct sk_buff *skb)
57{ 57{
58#ifdef CONFIG_INET
58 struct net_device *dev = skb->dev; 59 struct net_device *dev = skb->dev;
59 struct net_device_stats *stats = netdev_priv(dev); 60 struct net_device_stats *stats = netdev_priv(dev);
60 unsigned char *bp = (unsigned char *)skb->data; 61 unsigned char *bp = (unsigned char *)skb->data;
61 struct sk_buff *skbn; 62 struct sk_buff *skbn;
62 unsigned int len; 63 unsigned int len;
63 64
64#ifdef CONFIG_INET
65 if (arp_find(bp + 7, skb)) { 65 if (arp_find(bp + 7, skb)) {
66 return 1; 66 return 1;
67 } 67 }
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 4ee0879d3540..7f7fcb46b4fa 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -182,7 +182,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety
182 break; 182 break;
183 } 183 }
184 if (atomic_read(&sk->sk_rmem_alloc) > 184 if (atomic_read(&sk->sk_rmem_alloc) >
185 (sk->sk_rcvbuf / 2)) 185 (sk->sk_rcvbuf >> 1))
186 rose->condition |= ROSE_COND_OWN_RX_BUSY; 186 rose->condition |= ROSE_COND_OWN_RX_BUSY;
187 } 187 }
188 /* 188 /*
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 540c0f26ffee..fb9359fb2358 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -994,8 +994,8 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
994 goto out; 994 goto out;
995 } 995 }
996 996
997 len = (((skb->data[3] >> 4) & 0x0F) + 1) / 2; 997 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
998 len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2; 998 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
999 999
1000 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 1000 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
1001 1001
@@ -1068,6 +1068,7 @@ out:
1068#ifdef CONFIG_PROC_FS 1068#ifdef CONFIG_PROC_FS
1069 1069
1070static void *rose_node_start(struct seq_file *seq, loff_t *pos) 1070static void *rose_node_start(struct seq_file *seq, loff_t *pos)
1071 __acquires(rose_neigh_list_lock)
1071{ 1072{
1072 struct rose_node *rose_node; 1073 struct rose_node *rose_node;
1073 int i = 1; 1074 int i = 1;
@@ -1091,6 +1092,7 @@ static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos)
1091} 1092}
1092 1093
1093static void rose_node_stop(struct seq_file *seq, void *v) 1094static void rose_node_stop(struct seq_file *seq, void *v)
1095 __releases(rose_neigh_list_lock)
1094{ 1096{
1095 spin_unlock_bh(&rose_neigh_list_lock); 1097 spin_unlock_bh(&rose_neigh_list_lock);
1096} 1098}
@@ -1144,6 +1146,7 @@ const struct file_operations rose_nodes_fops = {
1144}; 1146};
1145 1147
1146static void *rose_neigh_start(struct seq_file *seq, loff_t *pos) 1148static void *rose_neigh_start(struct seq_file *seq, loff_t *pos)
1149 __acquires(rose_neigh_list_lock)
1147{ 1150{
1148 struct rose_neigh *rose_neigh; 1151 struct rose_neigh *rose_neigh;
1149 int i = 1; 1152 int i = 1;
@@ -1167,6 +1170,7 @@ static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
1167} 1170}
1168 1171
1169static void rose_neigh_stop(struct seq_file *seq, void *v) 1172static void rose_neigh_stop(struct seq_file *seq, void *v)
1173 __releases(rose_neigh_list_lock)
1170{ 1174{
1171 spin_unlock_bh(&rose_neigh_list_lock); 1175 spin_unlock_bh(&rose_neigh_list_lock);
1172} 1176}
@@ -1227,6 +1231,7 @@ const struct file_operations rose_neigh_fops = {
1227 1231
1228 1232
1229static void *rose_route_start(struct seq_file *seq, loff_t *pos) 1233static void *rose_route_start(struct seq_file *seq, loff_t *pos)
1234 __acquires(rose_route_list_lock)
1230{ 1235{
1231 struct rose_route *rose_route; 1236 struct rose_route *rose_route;
1232 int i = 1; 1237 int i = 1;
@@ -1250,6 +1255,7 @@ static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos)
1250} 1255}
1251 1256
1252static void rose_route_stop(struct seq_file *seq, void *v) 1257static void rose_route_stop(struct seq_file *seq, void *v)
1258 __releases(rose_route_list_lock)
1253{ 1259{
1254 spin_unlock_bh(&rose_route_list_lock); 1260 spin_unlock_bh(&rose_route_list_lock);
1255} 1261}
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index 455b0555a669..20be3485a97f 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -138,29 +138,15 @@ static ctl_table rose_table[] = {
138 { .ctl_name = 0 } 138 { .ctl_name = 0 }
139}; 139};
140 140
141static ctl_table rose_dir_table[] = { 141static struct ctl_path rose_path[] = {
142 { 142 { .procname = "net", .ctl_name = CTL_NET, },
143 .ctl_name = NET_ROSE, 143 { .procname = "rose", .ctl_name = NET_ROSE, },
144 .procname = "rose", 144 { }
145 .mode = 0555,
146 .child = rose_table
147 },
148 { .ctl_name = 0 }
149};
150
151static ctl_table rose_root_table[] = {
152 {
153 .ctl_name = CTL_NET,
154 .procname = "net",
155 .mode = 0555,
156 .child = rose_dir_table
157 },
158 { .ctl_name = 0 }
159}; 145};
160 146
161void __init rose_register_sysctl(void) 147void __init rose_register_sysctl(void)
162{ 148{
163 rose_table_header = register_sysctl_table(rose_root_table); 149 rose_table_header = register_sysctl_paths(rose_path, rose_table);
164} 150}
165 151
166void rose_unregister_sysctl(void) 152void rose_unregister_sysctl(void)
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index e662f1d07664..0d3103c4f11c 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -5,6 +5,7 @@
5config AF_RXRPC 5config AF_RXRPC
6 tristate "RxRPC session sockets" 6 tristate "RxRPC session sockets"
7 depends on INET && EXPERIMENTAL 7 depends on INET && EXPERIMENTAL
8 select CRYPTO
8 select KEYS 9 select KEYS
9 help 10 help
10 Say Y or M here to include support for RxRPC session sockets (just 11 Say Y or M here to include support for RxRPC session sockets (just
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c680017f5c8e..5e82f1c0afbb 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
65 if (rxrpc_writable(sk)) { 65 if (rxrpc_writable(sk)) {
66 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 66 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
67 wake_up_interruptible(sk->sk_sleep); 67 wake_up_interruptible(sk->sk_sleep);
68 sk_wake_async(sk, 2, POLL_OUT); 68 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
69 } 69 }
70 read_unlock(&sk->sk_callback_lock); 70 read_unlock(&sk->sk_callback_lock);
71} 71}
@@ -627,7 +627,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol)
627 sock->ops = &rxrpc_rpc_ops; 627 sock->ops = &rxrpc_rpc_ops;
628 sock->state = SS_UNCONNECTED; 628 sock->state = SS_UNCONNECTED;
629 629
630 sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); 630 sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto);
631 if (!sk) 631 if (!sk)
632 return -ENOMEM; 632 return -ENOMEM;
633 633
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index 3c04b00dab74..d9231245a79a 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -15,7 +15,7 @@
15#include <net/af_rxrpc.h> 15#include <net/af_rxrpc.h>
16#include "ar-internal.h" 16#include "ar-internal.h"
17 17
18const char *rxrpc_call_states[] = { 18const char *const rxrpc_call_states[] = {
19 [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", 19 [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
20 [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", 20 [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
21 [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", 21 [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index d6667f7bc85e..3869a5866752 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -651,7 +651,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
651 651
652 candidate->trans = trans; 652 candidate->trans = trans;
653 candidate->epoch = hdr->epoch; 653 candidate->epoch = hdr->epoch;
654 candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK); 654 candidate->cid = hdr->cid & cpu_to_be32(RXRPC_CIDMASK);
655 candidate->service_id = hdr->serviceId; 655 candidate->service_id = hdr->serviceId;
656 candidate->security_ix = hdr->securityIndex; 656 candidate->security_ix = hdr->securityIndex;
657 candidate->in_clientflag = RXRPC_CLIENT_INITIATED; 657 candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 91b5bbb003e2..f8a699e92962 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -20,6 +20,7 @@
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/af_rxrpc.h> 21#include <net/af_rxrpc.h>
22#include <net/ip.h> 22#include <net/ip.h>
23#include <net/udp.h>
23#include "ar-internal.h" 24#include "ar-internal.h"
24 25
25unsigned long rxrpc_ack_timeout = 1; 26unsigned long rxrpc_ack_timeout = 1;
@@ -594,7 +595,7 @@ dead_call:
594 read_unlock_bh(&conn->lock); 595 read_unlock_bh(&conn->lock);
595 596
596 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && 597 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
597 sp->hdr.seq == __constant_cpu_to_be32(1)) { 598 sp->hdr.seq == cpu_to_be32(1)) {
598 _debug("incoming call"); 599 _debug("incoming call");
599 skb_queue_tail(&conn->trans->local->accept_queue, skb); 600 skb_queue_tail(&conn->trans->local->accept_queue, skb);
600 rxrpc_queue_work(&conn->trans->local->acceptor); 601 rxrpc_queue_work(&conn->trans->local->acceptor);
@@ -707,10 +708,13 @@ void rxrpc_data_ready(struct sock *sk, int count)
707 if (skb_checksum_complete(skb)) { 708 if (skb_checksum_complete(skb)) {
708 rxrpc_free_skb(skb); 709 rxrpc_free_skb(skb);
709 rxrpc_put_local(local); 710 rxrpc_put_local(local);
711 UDP_INC_STATS_BH(UDP_MIB_INERRORS, 0);
710 _leave(" [CSUM failed]"); 712 _leave(" [CSUM failed]");
711 return; 713 return;
712 } 714 }
713 715
716 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 0);
717
714 /* the socket buffer we have is owned by UDP, with UDP's data all over 718 /* the socket buffer we have is owned by UDP, with UDP's data all over
715 * it, but we really want our own */ 719 * it, but we really want our own */
716 skb_orphan(skb); 720 skb_orphan(skb);
@@ -770,7 +774,7 @@ cant_route_call:
770 _debug("can't route call"); 774 _debug("can't route call");
771 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && 775 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
772 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { 776 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
773 if (sp->hdr.seq == __constant_cpu_to_be32(1)) { 777 if (sp->hdr.seq == cpu_to_be32(1)) {
774 _debug("first packet"); 778 _debug("first packet");
775 skb_queue_tail(&local->accept_queue, skb); 779 skb_queue_tail(&local->accept_queue, skb);
776 rxrpc_queue_work(&local->acceptor); 780 rxrpc_queue_work(&local->acceptor);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 58aaf892238e..1aaa2e804b0d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -565,9 +565,9 @@ extern void __exit rxrpc_destroy_all_peers(void);
565/* 565/*
566 * ar-proc.c 566 * ar-proc.c
567 */ 567 */
568extern const char *rxrpc_call_states[]; 568extern const char *const rxrpc_call_states[];
569extern struct file_operations rxrpc_call_seq_fops; 569extern const struct file_operations rxrpc_call_seq_fops;
570extern struct file_operations rxrpc_connection_seq_fops; 570extern const struct file_operations rxrpc_connection_seq_fops;
571 571
572/* 572/*
573 * ar-recvmsg.c 573 * ar-recvmsg.c
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index fe03f71f17da..f3a2bd747a8f 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -114,7 +114,7 @@ static int rxrpc_create_local(struct rxrpc_local *local)
114 return 0; 114 return 0;
115 115
116error: 116error:
117 local->socket->ops->shutdown(local->socket, 2); 117 kernel_sock_shutdown(local->socket, SHUT_RDWR);
118 local->socket->sk->sk_user_data = NULL; 118 local->socket->sk->sk_user_data = NULL;
119 sock_release(local->socket); 119 sock_release(local->socket);
120 local->socket = NULL; 120 local->socket = NULL;
@@ -267,7 +267,7 @@ static void rxrpc_destroy_local(struct work_struct *work)
267 /* finish cleaning up the local descriptor */ 267 /* finish cleaning up the local descriptor */
268 rxrpc_purge_queue(&local->accept_queue); 268 rxrpc_purge_queue(&local->accept_queue);
269 rxrpc_purge_queue(&local->reject_queue); 269 rxrpc_purge_queue(&local->reject_queue);
270 local->socket->ops->shutdown(local->socket, 2); 270 kernel_sock_shutdown(local->socket, SHUT_RDWR);
271 sock_release(local->socket); 271 sock_release(local->socket);
272 272
273 up_read(&rxrpc_local_sem); 273 up_read(&rxrpc_local_sem);
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 90fa107a8af9..2abe2081a5e8 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -57,7 +57,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
57 BUG(); 57 BUG();
58 } 58 }
59 59
60 ret = ip_route_output_key(&rt, &fl); 60 ret = ip_route_output_key(&init_net, &rt, &fl);
61 if (ret < 0) { 61 if (ret < 0) {
62 _leave(" [route err %d]", ret); 62 _leave(" [route err %d]", ret);
63 return; 63 return;
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
index 2e83ce325d15..83eda247fe48 100644
--- a/net/rxrpc/ar-proc.c
+++ b/net/rxrpc/ar-proc.c
@@ -14,7 +14,7 @@
14#include <net/af_rxrpc.h> 14#include <net/af_rxrpc.h>
15#include "ar-internal.h" 15#include "ar-internal.h"
16 16
17static const char *rxrpc_conn_states[] = { 17static const char *const rxrpc_conn_states[] = {
18 [RXRPC_CONN_UNUSED] = "Unused ", 18 [RXRPC_CONN_UNUSED] = "Unused ",
19 [RXRPC_CONN_CLIENT] = "Client ", 19 [RXRPC_CONN_CLIENT] = "Client ",
20 [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ", 20 [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ",
@@ -98,7 +98,7 @@ static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
98 return seq_open(file, &rxrpc_call_seq_ops); 98 return seq_open(file, &rxrpc_call_seq_ops);
99} 99}
100 100
101struct file_operations rxrpc_call_seq_fops = { 101const struct file_operations rxrpc_call_seq_fops = {
102 .owner = THIS_MODULE, 102 .owner = THIS_MODULE,
103 .open = rxrpc_call_seq_open, 103 .open = rxrpc_call_seq_open,
104 .read = seq_read, 104 .read = seq_read,
@@ -183,7 +183,7 @@ static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
183 return seq_open(file, &rxrpc_connection_seq_ops); 183 return seq_open(file, &rxrpc_connection_seq_ops);
184} 184}
185 185
186struct file_operations rxrpc_connection_seq_fops = { 186const struct file_operations rxrpc_connection_seq_fops = {
187 .owner = THIS_MODULE, 187 .owner = THIS_MODULE,
188 .open = rxrpc_connection_seq_open, 188 .open = rxrpc_connection_seq_open,
189 .read = seq_read, 189 .read = seq_read,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index eebefb6ef139..f48434adb7c2 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -237,7 +237,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
237 len = data_size + call->conn->size_align - 1; 237 len = data_size + call->conn->size_align - 1;
238 len &= ~(call->conn->size_align - 1); 238 len &= ~(call->conn->size_align - 1);
239 239
240 sg_init_table(sg, skb_to_sgvec(skb, sg, 0, len)); 240 sg_init_table(sg, nsg);
241 skb_to_sgvec(skb, sg, 0, len);
241 crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); 242 crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
242 243
243 _leave(" = 0"); 244 _leave(" = 0");
@@ -283,7 +284,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
283 284
284 /* calculate the security checksum */ 285 /* calculate the security checksum */
285 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 286 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
286 x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff); 287 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
287 tmpbuf.x[0] = sp->hdr.callNumber; 288 tmpbuf.x[0] = sp->hdr.callNumber;
288 tmpbuf.x[1] = x; 289 tmpbuf.x[1] = x;
289 290
@@ -344,7 +345,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
344 goto nomem; 345 goto nomem;
345 346
346 sg_init_table(sg, nsg); 347 sg_init_table(sg, nsg);
347 sg_mark_end(sg, skb_to_sgvec(skb, sg, 0, 8)); 348 skb_to_sgvec(skb, sg, 0, 8);
348 349
349 /* start the decryption afresh */ 350 /* start the decryption afresh */
350 memset(&iv, 0, sizeof(iv)); 351 memset(&iv, 0, sizeof(iv));
@@ -426,7 +427,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
426 } 427 }
427 428
428 sg_init_table(sg, nsg); 429 sg_init_table(sg, nsg);
429 sg_mark_end(sg, skb_to_sgvec(skb, sg, 0, skb->len)); 430 skb_to_sgvec(skb, sg, 0, skb->len);
430 431
431 /* decrypt from the session key */ 432 /* decrypt from the session key */
432 payload = call->conn->key->payload.data; 433 payload = call->conn->key->payload.data;
@@ -517,7 +518,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
517 518
518 /* validate the security checksum */ 519 /* validate the security checksum */
519 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 520 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
520 x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff); 521 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
521 tmpbuf.x[0] = call->call_id; 522 tmpbuf.x[0] = call->call_id;
522 tmpbuf.x[1] = x; 523 tmpbuf.x[1] = x;
523 524
@@ -701,7 +702,7 @@ static void rxkad_sg_set_buf2(struct scatterlist sg[2],
701 nsg++; 702 nsg++;
702 } 703 }
703 704
704 sg_mark_end(sg, nsg); 705 sg_mark_end(&sg[nsg - 1]);
705 706
706 ASSERTCMP(sg[0].length + sg[1].length, ==, buflen); 707 ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
707} 708}
@@ -1020,6 +1021,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1020 1021
1021 abort_code = RXKADINCONSISTENCY; 1022 abort_code = RXKADINCONSISTENCY;
1022 if (version != RXKAD_VERSION) 1023 if (version != RXKAD_VERSION)
1024 goto protocol_error;
1023 1025
1024 abort_code = RXKADTICKETLEN; 1026 abort_code = RXKADTICKETLEN;
1025 if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) 1027 if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9c15c4888d12..82adfe6447d7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -198,6 +198,7 @@ config NET_SCH_NETEM
198 198
199config NET_SCH_INGRESS 199config NET_SCH_INGRESS
200 tristate "Ingress Qdisc" 200 tristate "Ingress Qdisc"
201 depends on NET_CLS_ACT
201 ---help--- 202 ---help---
202 Say Y here if you want to use classifiers for incoming packets. 203 Say Y here if you want to use classifiers for incoming packets.
203 If unsure, say Y. 204 If unsure, say Y.
@@ -306,6 +307,17 @@ config NET_CLS_RSVP6
306 To compile this code as a module, choose M here: the 307 To compile this code as a module, choose M here: the
307 module will be called cls_rsvp6. 308 module will be called cls_rsvp6.
308 309
310config NET_CLS_FLOW
311 tristate "Flow classifier"
312 select NET_CLS
313 ---help---
314 If you say Y here, you will be able to classify packets based on
315 a configurable combination of packet keys. This is mostly useful
316 in combination with SFQ.
317
318 To compile this code as a module, choose M here: the
319 module will be called cls_flow.
320
309config NET_EMATCH 321config NET_EMATCH
310 bool "Extended Matches" 322 bool "Extended Matches"
311 select NET_CLS 323 select NET_CLS
@@ -445,7 +457,6 @@ config NET_ACT_IPT
445config NET_ACT_NAT 457config NET_ACT_NAT
446 tristate "Stateless NAT" 458 tristate "Stateless NAT"
447 depends on NET_CLS_ACT 459 depends on NET_CLS_ACT
448 select NETFILTER
449 ---help--- 460 ---help---
450 Say Y here to do stateless NAT on IPv4 packets. You should use 461 Say Y here to do stateless NAT on IPv4 packets. You should use
451 netfilter for NAT unless you know what you are doing. 462 netfilter for NAT unless you know what you are doing.
@@ -476,15 +487,6 @@ config NET_ACT_SIMP
476 To compile this code as a module, choose M here: the 487 To compile this code as a module, choose M here: the
477 module will be called simple. 488 module will be called simple.
478 489
479config NET_CLS_POLICE
480 bool "Traffic Policing (obsolete)"
481 select NET_CLS_ACT
482 select NET_ACT_POLICE
483 ---help---
484 Say Y here if you want to do traffic policing, i.e. strict
485 bandwidth limiting. This option is obsolete and just selects
486 the option replacing it. It will be removed in the future.
487
488config NET_CLS_IND 490config NET_CLS_IND
489 bool "Incoming device classification" 491 bool "Incoming device classification"
490 depends on NET_CLS_U32 || NET_CLS_FW 492 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 81ecbe8e7dce..1d2b0f7df848 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
35obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o 35obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
36obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o 36obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
37obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o 37obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
38obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
38obj-$(CONFIG_NET_EMATCH) += ematch.o 39obj-$(CONFIG_NET_EMATCH) += ematch.o
39obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o 40obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
40obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o 41obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 72cdb0fade20..0b8eb235bc13 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -18,6 +18,9 @@
18#include <linux/skbuff.h> 18#include <linux/skbuff.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/kmod.h> 20#include <linux/kmod.h>
21#include <linux/err.h>
22#include <net/net_namespace.h>
23#include <net/sock.h>
21#include <net/sch_generic.h> 24#include <net/sch_generic.h>
22#include <net/act_api.h> 25#include <net/act_api.h>
23#include <net/netlink.h> 26#include <net/netlink.h>
@@ -66,7 +69,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
66{ 69{
67 struct tcf_common *p; 70 struct tcf_common *p;
68 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; 71 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
69 struct rtattr *r ; 72 struct nlattr *nest;
70 73
71 read_lock_bh(hinfo->lock); 74 read_lock_bh(hinfo->lock);
72 75
@@ -81,15 +84,17 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
81 continue; 84 continue;
82 a->priv = p; 85 a->priv = p;
83 a->order = n_i; 86 a->order = n_i;
84 r = (struct rtattr *)skb_tail_pointer(skb); 87
85 RTA_PUT(skb, a->order, 0, NULL); 88 nest = nla_nest_start(skb, a->order);
89 if (nest == NULL)
90 goto nla_put_failure;
86 err = tcf_action_dump_1(skb, a, 0, 0); 91 err = tcf_action_dump_1(skb, a, 0, 0);
87 if (err < 0) { 92 if (err < 0) {
88 index--; 93 index--;
89 nlmsg_trim(skb, r); 94 nlmsg_trim(skb, nest);
90 goto done; 95 goto done;
91 } 96 }
92 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 97 nla_nest_end(skb, nest);
93 n_i++; 98 n_i++;
94 if (n_i >= TCA_ACT_MAX_PRIO) 99 if (n_i >= TCA_ACT_MAX_PRIO)
95 goto done; 100 goto done;
@@ -101,8 +106,8 @@ done:
101 cb->args[0] += n_i; 106 cb->args[0] += n_i;
102 return n_i; 107 return n_i;
103 108
104rtattr_failure: 109nla_put_failure:
105 nlmsg_trim(skb, r); 110 nla_nest_cancel(skb, nest);
106 goto done; 111 goto done;
107} 112}
108 113
@@ -110,12 +115,13 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
110 struct tcf_hashinfo *hinfo) 115 struct tcf_hashinfo *hinfo)
111{ 116{
112 struct tcf_common *p, *s_p; 117 struct tcf_common *p, *s_p;
113 struct rtattr *r ; 118 struct nlattr *nest;
114 int i= 0, n_i = 0; 119 int i= 0, n_i = 0;
115 120
116 r = (struct rtattr *)skb_tail_pointer(skb); 121 nest = nla_nest_start(skb, a->order);
117 RTA_PUT(skb, a->order, 0, NULL); 122 if (nest == NULL)
118 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); 123 goto nla_put_failure;
124 NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
119 for (i = 0; i < (hinfo->hmask + 1); i++) { 125 for (i = 0; i < (hinfo->hmask + 1); i++) {
120 p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; 126 p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
121 127
@@ -127,12 +133,12 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
127 p = s_p; 133 p = s_p;
128 } 134 }
129 } 135 }
130 RTA_PUT(skb, TCA_FCNT, 4, &n_i); 136 NLA_PUT_U32(skb, TCA_FCNT, n_i);
131 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 137 nla_nest_end(skb, nest);
132 138
133 return n_i; 139 return n_i;
134rtattr_failure: 140nla_put_failure:
135 nlmsg_trim(skb, r); 141 nla_nest_cancel(skb, nest);
136 return -EINVAL; 142 return -EINVAL;
137} 143}
138 144
@@ -209,7 +215,7 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
209} 215}
210EXPORT_SYMBOL(tcf_hash_check); 216EXPORT_SYMBOL(tcf_hash_check);
211 217
212struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) 218struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
213{ 219{
214 struct tcf_common *p = kzalloc(size, GFP_KERNEL); 220 struct tcf_common *p = kzalloc(size, GFP_KERNEL);
215 221
@@ -261,6 +267,7 @@ int tcf_register_action(struct tc_action_ops *act)
261 write_unlock(&act_mod_lock); 267 write_unlock(&act_mod_lock);
262 return 0; 268 return 0;
263} 269}
270EXPORT_SYMBOL(tcf_register_action);
264 271
265int tcf_unregister_action(struct tc_action_ops *act) 272int tcf_unregister_action(struct tc_action_ops *act)
266{ 273{
@@ -279,6 +286,7 @@ int tcf_unregister_action(struct tc_action_ops *act)
279 write_unlock(&act_mod_lock); 286 write_unlock(&act_mod_lock);
280 return err; 287 return err;
281} 288}
289EXPORT_SYMBOL(tcf_unregister_action);
282 290
283/* lookup by name */ 291/* lookup by name */
284static struct tc_action_ops *tc_lookup_action_n(char *kind) 292static struct tc_action_ops *tc_lookup_action_n(char *kind)
@@ -301,15 +309,15 @@ static struct tc_action_ops *tc_lookup_action_n(char *kind)
301 return a; 309 return a;
302} 310}
303 311
304/* lookup by rtattr */ 312/* lookup by nlattr */
305static struct tc_action_ops *tc_lookup_action(struct rtattr *kind) 313static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
306{ 314{
307 struct tc_action_ops *a = NULL; 315 struct tc_action_ops *a = NULL;
308 316
309 if (kind) { 317 if (kind) {
310 read_lock(&act_mod_lock); 318 read_lock(&act_mod_lock);
311 for (a = act_base; a; a = a->next) { 319 for (a = act_base; a; a = a->next) {
312 if (rtattr_strcmp(kind, a->kind) == 0) { 320 if (nla_strcmp(kind, a->kind) == 0) {
313 if (!try_module_get(a->owner)) { 321 if (!try_module_get(a->owner)) {
314 read_unlock(&act_mod_lock); 322 read_unlock(&act_mod_lock);
315 return NULL; 323 return NULL;
@@ -375,6 +383,7 @@ repeat:
375exec_done: 383exec_done:
376 return ret; 384 return ret;
377} 385}
386EXPORT_SYMBOL(tcf_action_exec);
378 387
379void tcf_action_destroy(struct tc_action *act, int bind) 388void tcf_action_destroy(struct tc_action *act, int bind)
380{ 389{
@@ -409,73 +418,77 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
409{ 418{
410 int err = -EINVAL; 419 int err = -EINVAL;
411 unsigned char *b = skb_tail_pointer(skb); 420 unsigned char *b = skb_tail_pointer(skb);
412 struct rtattr *r; 421 struct nlattr *nest;
413 422
414 if (a->ops == NULL || a->ops->dump == NULL) 423 if (a->ops == NULL || a->ops->dump == NULL)
415 return err; 424 return err;
416 425
417 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); 426 NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
418 if (tcf_action_copy_stats(skb, a, 0)) 427 if (tcf_action_copy_stats(skb, a, 0))
419 goto rtattr_failure; 428 goto nla_put_failure;
420 r = (struct rtattr *)skb_tail_pointer(skb); 429 nest = nla_nest_start(skb, TCA_OPTIONS);
421 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 430 if (nest == NULL)
431 goto nla_put_failure;
422 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { 432 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
423 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 433 nla_nest_end(skb, nest);
424 return err; 434 return err;
425 } 435 }
426 436
427rtattr_failure: 437nla_put_failure:
428 nlmsg_trim(skb, b); 438 nlmsg_trim(skb, b);
429 return -1; 439 return -1;
430} 440}
441EXPORT_SYMBOL(tcf_action_dump_1);
431 442
432int 443int
433tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) 444tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
434{ 445{
435 struct tc_action *a; 446 struct tc_action *a;
436 int err = -EINVAL; 447 int err = -EINVAL;
437 unsigned char *b = skb_tail_pointer(skb); 448 struct nlattr *nest;
438 struct rtattr *r ;
439 449
440 while ((a = act) != NULL) { 450 while ((a = act) != NULL) {
441 r = (struct rtattr *)skb_tail_pointer(skb);
442 act = a->next; 451 act = a->next;
443 RTA_PUT(skb, a->order, 0, NULL); 452 nest = nla_nest_start(skb, a->order);
453 if (nest == NULL)
454 goto nla_put_failure;
444 err = tcf_action_dump_1(skb, a, bind, ref); 455 err = tcf_action_dump_1(skb, a, bind, ref);
445 if (err < 0) 456 if (err < 0)
446 goto errout; 457 goto errout;
447 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 458 nla_nest_end(skb, nest);
448 } 459 }
449 460
450 return 0; 461 return 0;
451 462
452rtattr_failure: 463nla_put_failure:
453 err = -EINVAL; 464 err = -EINVAL;
454errout: 465errout:
455 nlmsg_trim(skb, b); 466 nla_nest_cancel(skb, nest);
456 return err; 467 return err;
457} 468}
458 469
459struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, 470struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
460 char *name, int ovr, int bind, int *err) 471 char *name, int ovr, int bind)
461{ 472{
462 struct tc_action *a; 473 struct tc_action *a;
463 struct tc_action_ops *a_o; 474 struct tc_action_ops *a_o;
464 char act_name[IFNAMSIZ]; 475 char act_name[IFNAMSIZ];
465 struct rtattr *tb[TCA_ACT_MAX+1]; 476 struct nlattr *tb[TCA_ACT_MAX+1];
466 struct rtattr *kind; 477 struct nlattr *kind;
467 478 int err;
468 *err = -EINVAL;
469 479
470 if (name == NULL) { 480 if (name == NULL) {
471 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) 481 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
482 if (err < 0)
472 goto err_out; 483 goto err_out;
473 kind = tb[TCA_ACT_KIND-1]; 484 err = -EINVAL;
485 kind = tb[TCA_ACT_KIND];
474 if (kind == NULL) 486 if (kind == NULL)
475 goto err_out; 487 goto err_out;
476 if (rtattr_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) 488 if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
477 goto err_out; 489 goto err_out;
478 } else { 490 } else {
491 err = -EINVAL;
479 if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) 492 if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
480 goto err_out; 493 goto err_out;
481 } 494 }
@@ -496,36 +509,35 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
496 * indicate this using -EAGAIN. 509 * indicate this using -EAGAIN.
497 */ 510 */
498 if (a_o != NULL) { 511 if (a_o != NULL) {
499 *err = -EAGAIN; 512 err = -EAGAIN;
500 goto err_mod; 513 goto err_mod;
501 } 514 }
502#endif 515#endif
503 *err = -ENOENT; 516 err = -ENOENT;
504 goto err_out; 517 goto err_out;
505 } 518 }
506 519
507 *err = -ENOMEM; 520 err = -ENOMEM;
508 a = kzalloc(sizeof(*a), GFP_KERNEL); 521 a = kzalloc(sizeof(*a), GFP_KERNEL);
509 if (a == NULL) 522 if (a == NULL)
510 goto err_mod; 523 goto err_mod;
511 524
512 /* backward compatibility for policer */ 525 /* backward compatibility for policer */
513 if (name == NULL) 526 if (name == NULL)
514 *err = a_o->init(tb[TCA_ACT_OPTIONS-1], est, a, ovr, bind); 527 err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
515 else 528 else
516 *err = a_o->init(rta, est, a, ovr, bind); 529 err = a_o->init(nla, est, a, ovr, bind);
517 if (*err < 0) 530 if (err < 0)
518 goto err_free; 531 goto err_free;
519 532
520 /* module count goes up only when brand new policy is created 533 /* module count goes up only when brand new policy is created
521 if it exists and is only bound to in a_o->init() then 534 if it exists and is only bound to in a_o->init() then
522 ACT_P_CREATED is not returned (a zero is). 535 ACT_P_CREATED is not returned (a zero is).
523 */ 536 */
524 if (*err != ACT_P_CREATED) 537 if (err != ACT_P_CREATED)
525 module_put(a_o->owner); 538 module_put(a_o->owner);
526 a->ops = a_o; 539 a->ops = a_o;
527 540
528 *err = 0;
529 return a; 541 return a;
530 542
531err_free: 543err_free:
@@ -533,26 +545,26 @@ err_free:
533err_mod: 545err_mod:
534 module_put(a_o->owner); 546 module_put(a_o->owner);
535err_out: 547err_out:
536 return NULL; 548 return ERR_PTR(err);
537} 549}
538 550
539struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est, 551struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
540 char *name, int ovr, int bind, int *err) 552 char *name, int ovr, int bind)
541{ 553{
542 struct rtattr *tb[TCA_ACT_MAX_PRIO+1]; 554 struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
543 struct tc_action *head = NULL, *act, *act_prev = NULL; 555 struct tc_action *head = NULL, *act, *act_prev = NULL;
556 int err;
544 int i; 557 int i;
545 558
546 if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0) { 559 err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
547 *err = -EINVAL; 560 if (err < 0)
548 return head; 561 return ERR_PTR(err);
549 }
550 562
551 for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) { 563 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
552 act = tcf_action_init_1(tb[i], est, name, ovr, bind, err); 564 act = tcf_action_init_1(tb[i], est, name, ovr, bind);
553 if (act == NULL) 565 if (IS_ERR(act))
554 goto err; 566 goto err;
555 act->order = i+1; 567 act->order = i;
556 568
557 if (head == NULL) 569 if (head == NULL)
558 head = act; 570 head = act;
@@ -565,7 +577,7 @@ struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est,
565err: 577err:
566 if (head != NULL) 578 if (head != NULL)
567 tcf_action_destroy(head, bind); 579 tcf_action_destroy(head, bind);
568 return NULL; 580 return act;
569} 581}
570 582
571int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, 583int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
@@ -619,7 +631,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
619 struct tcamsg *t; 631 struct tcamsg *t;
620 struct nlmsghdr *nlh; 632 struct nlmsghdr *nlh;
621 unsigned char *b = skb_tail_pointer(skb); 633 unsigned char *b = skb_tail_pointer(skb);
622 struct rtattr *x; 634 struct nlattr *nest;
623 635
624 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); 636 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
625 637
@@ -628,18 +640,19 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
628 t->tca__pad1 = 0; 640 t->tca__pad1 = 0;
629 t->tca__pad2 = 0; 641 t->tca__pad2 = 0;
630 642
631 x = (struct rtattr *)skb_tail_pointer(skb); 643 nest = nla_nest_start(skb, TCA_ACT_TAB);
632 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 644 if (nest == NULL)
645 goto nla_put_failure;
633 646
634 if (tcf_action_dump(skb, a, bind, ref) < 0) 647 if (tcf_action_dump(skb, a, bind, ref) < 0)
635 goto rtattr_failure; 648 goto nla_put_failure;
636 649
637 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 650 nla_nest_end(skb, nest);
638 651
639 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 652 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
640 return skb->len; 653 return skb->len;
641 654
642rtattr_failure: 655nla_put_failure:
643nlmsg_failure: 656nlmsg_failure:
644 nlmsg_trim(skb, b); 657 nlmsg_trim(skb, b);
645 return -1; 658 return -1;
@@ -658,48 +671,51 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
658 return -EINVAL; 671 return -EINVAL;
659 } 672 }
660 673
661 return rtnl_unicast(skb, pid); 674 return rtnl_unicast(skb, &init_net, pid);
662} 675}
663 676
664static struct tc_action * 677static struct tc_action *
665tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err) 678tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
666{ 679{
667 struct rtattr *tb[TCA_ACT_MAX+1]; 680 struct nlattr *tb[TCA_ACT_MAX+1];
668 struct tc_action *a; 681 struct tc_action *a;
669 int index; 682 int index;
683 int err;
670 684
671 *err = -EINVAL; 685 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
672 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) 686 if (err < 0)
673 return NULL; 687 goto err_out;
674 688
675 if (tb[TCA_ACT_INDEX - 1] == NULL || 689 err = -EINVAL;
676 RTA_PAYLOAD(tb[TCA_ACT_INDEX - 1]) < sizeof(index)) 690 if (tb[TCA_ACT_INDEX] == NULL ||
677 return NULL; 691 nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
678 index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]); 692 goto err_out;
693 index = nla_get_u32(tb[TCA_ACT_INDEX]);
679 694
680 *err = -ENOMEM; 695 err = -ENOMEM;
681 a = kzalloc(sizeof(struct tc_action), GFP_KERNEL); 696 a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
682 if (a == NULL) 697 if (a == NULL)
683 return NULL; 698 goto err_out;
684 699
685 *err = -EINVAL; 700 err = -EINVAL;
686 a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]); 701 a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
687 if (a->ops == NULL) 702 if (a->ops == NULL)
688 goto err_free; 703 goto err_free;
689 if (a->ops->lookup == NULL) 704 if (a->ops->lookup == NULL)
690 goto err_mod; 705 goto err_mod;
691 *err = -ENOENT; 706 err = -ENOENT;
692 if (a->ops->lookup(a, index) == 0) 707 if (a->ops->lookup(a, index) == 0)
693 goto err_mod; 708 goto err_mod;
694 709
695 module_put(a->ops->owner); 710 module_put(a->ops->owner);
696 *err = 0;
697 return a; 711 return a;
712
698err_mod: 713err_mod:
699 module_put(a->ops->owner); 714 module_put(a->ops->owner);
700err_free: 715err_free:
701 kfree(a); 716 kfree(a);
702 return NULL; 717err_out:
718 return ERR_PTR(err);
703} 719}
704 720
705static void cleanup_a(struct tc_action *act) 721static void cleanup_a(struct tc_action *act)
@@ -725,16 +741,16 @@ static struct tc_action *create_a(int i)
725 return act; 741 return act;
726} 742}
727 743
728static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) 744static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
729{ 745{
730 struct sk_buff *skb; 746 struct sk_buff *skb;
731 unsigned char *b; 747 unsigned char *b;
732 struct nlmsghdr *nlh; 748 struct nlmsghdr *nlh;
733 struct tcamsg *t; 749 struct tcamsg *t;
734 struct netlink_callback dcb; 750 struct netlink_callback dcb;
735 struct rtattr *x; 751 struct nlattr *nest;
736 struct rtattr *tb[TCA_ACT_MAX+1]; 752 struct nlattr *tb[TCA_ACT_MAX+1];
737 struct rtattr *kind; 753 struct nlattr *kind;
738 struct tc_action *a = create_a(0); 754 struct tc_action *a = create_a(0);
739 int err = -EINVAL; 755 int err = -EINVAL;
740 756
@@ -752,10 +768,12 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
752 768
753 b = skb_tail_pointer(skb); 769 b = skb_tail_pointer(skb);
754 770
755 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) 771 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
772 if (err < 0)
756 goto err_out; 773 goto err_out;
757 774
758 kind = tb[TCA_ACT_KIND-1]; 775 err = -EINVAL;
776 kind = tb[TCA_ACT_KIND];
759 a->ops = tc_lookup_action(kind); 777 a->ops = tc_lookup_action(kind);
760 if (a->ops == NULL) 778 if (a->ops == NULL)
761 goto err_out; 779 goto err_out;
@@ -766,26 +784,27 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
766 t->tca__pad1 = 0; 784 t->tca__pad1 = 0;
767 t->tca__pad2 = 0; 785 t->tca__pad2 = 0;
768 786
769 x = (struct rtattr *)skb_tail_pointer(skb); 787 nest = nla_nest_start(skb, TCA_ACT_TAB);
770 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 788 if (nest == NULL)
789 goto nla_put_failure;
771 790
772 err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); 791 err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
773 if (err < 0) 792 if (err < 0)
774 goto rtattr_failure; 793 goto nla_put_failure;
775 794
776 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 795 nla_nest_end(skb, nest);
777 796
778 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 797 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
779 nlh->nlmsg_flags |= NLM_F_ROOT; 798 nlh->nlmsg_flags |= NLM_F_ROOT;
780 module_put(a->ops->owner); 799 module_put(a->ops->owner);
781 kfree(a); 800 kfree(a);
782 err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 801 err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
783 if (err > 0) 802 if (err > 0)
784 return 0; 803 return 0;
785 804
786 return err; 805 return err;
787 806
788rtattr_failure: 807nla_put_failure:
789nlmsg_failure: 808nlmsg_failure:
790 module_put(a->ops->owner); 809 module_put(a->ops->owner);
791err_out: 810err_out:
@@ -795,25 +814,28 @@ err_out:
795} 814}
796 815
797static int 816static int
798tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event) 817tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
799{ 818{
800 int i, ret = 0; 819 int i, ret;
801 struct rtattr *tb[TCA_ACT_MAX_PRIO+1]; 820 struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
802 struct tc_action *head = NULL, *act, *act_prev = NULL; 821 struct tc_action *head = NULL, *act, *act_prev = NULL;
803 822
804 if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0) 823 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
805 return -EINVAL; 824 if (ret < 0)
825 return ret;
806 826
807 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { 827 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
808 if (tb[0] != NULL && tb[1] == NULL) 828 if (tb[0] != NULL && tb[1] == NULL)
809 return tca_action_flush(tb[0], n, pid); 829 return tca_action_flush(tb[0], n, pid);
810 } 830 }
811 831
812 for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) { 832 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
813 act = tcf_action_get_1(tb[i], n, pid, &ret); 833 act = tcf_action_get_1(tb[i], n, pid);
814 if (act == NULL) 834 if (IS_ERR(act)) {
835 ret = PTR_ERR(act);
815 goto err; 836 goto err;
816 act->order = i+1; 837 }
838 act->order = i;
817 839
818 if (head == NULL) 840 if (head == NULL)
819 head = act; 841 head = act;
@@ -842,7 +864,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
842 864
843 /* now do the delete */ 865 /* now do the delete */
844 tcf_action_destroy(head, 0); 866 tcf_action_destroy(head, 0);
845 ret = rtnetlink_send(skb, pid, RTNLGRP_TC, 867 ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
846 n->nlmsg_flags&NLM_F_ECHO); 868 n->nlmsg_flags&NLM_F_ECHO);
847 if (ret > 0) 869 if (ret > 0)
848 return 0; 870 return 0;
@@ -859,7 +881,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
859 struct tcamsg *t; 881 struct tcamsg *t;
860 struct nlmsghdr *nlh; 882 struct nlmsghdr *nlh;
861 struct sk_buff *skb; 883 struct sk_buff *skb;
862 struct rtattr *x; 884 struct nlattr *nest;
863 unsigned char *b; 885 unsigned char *b;
864 int err = 0; 886 int err = 0;
865 887
@@ -875,23 +897,24 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
875 t->tca__pad1 = 0; 897 t->tca__pad1 = 0;
876 t->tca__pad2 = 0; 898 t->tca__pad2 = 0;
877 899
878 x = (struct rtattr *)skb_tail_pointer(skb); 900 nest = nla_nest_start(skb, TCA_ACT_TAB);
879 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 901 if (nest == NULL)
902 goto nla_put_failure;
880 903
881 if (tcf_action_dump(skb, a, 0, 0) < 0) 904 if (tcf_action_dump(skb, a, 0, 0) < 0)
882 goto rtattr_failure; 905 goto nla_put_failure;
883 906
884 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 907 nla_nest_end(skb, nest);
885 908
886 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 909 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
887 NETLINK_CB(skb).dst_group = RTNLGRP_TC; 910 NETLINK_CB(skb).dst_group = RTNLGRP_TC;
888 911
889 err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); 912 err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
890 if (err > 0) 913 if (err > 0)
891 err = 0; 914 err = 0;
892 return err; 915 return err;
893 916
894rtattr_failure: 917nla_put_failure:
895nlmsg_failure: 918nlmsg_failure:
896 kfree_skb(skb); 919 kfree_skb(skb);
897 return -1; 920 return -1;
@@ -899,16 +922,20 @@ nlmsg_failure:
899 922
900 923
901static int 924static int
902tcf_action_add(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int ovr) 925tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
903{ 926{
904 int ret = 0; 927 int ret = 0;
905 struct tc_action *act; 928 struct tc_action *act;
906 struct tc_action *a; 929 struct tc_action *a;
907 u32 seq = n->nlmsg_seq; 930 u32 seq = n->nlmsg_seq;
908 931
909 act = tcf_action_init(rta, NULL, NULL, ovr, 0, &ret); 932 act = tcf_action_init(nla, NULL, NULL, ovr, 0);
910 if (act == NULL) 933 if (act == NULL)
911 goto done; 934 goto done;
935 if (IS_ERR(act)) {
936 ret = PTR_ERR(act);
937 goto done;
938 }
912 939
913 /* dump then free all the actions after update; inserted policy 940 /* dump then free all the actions after update; inserted policy
914 * stays intact 941 * stays intact
@@ -924,11 +951,19 @@ done:
924 951
925static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 952static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
926{ 953{
927 struct rtattr **tca = arg; 954 struct net *net = skb->sk->sk_net;
955 struct nlattr *tca[TCA_ACT_MAX + 1];
928 u32 pid = skb ? NETLINK_CB(skb).pid : 0; 956 u32 pid = skb ? NETLINK_CB(skb).pid : 0;
929 int ret = 0, ovr = 0; 957 int ret = 0, ovr = 0;
930 958
931 if (tca[TCA_ACT_TAB-1] == NULL) { 959 if (net != &init_net)
960 return -EINVAL;
961
962 ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
963 if (ret < 0)
964 return ret;
965
966 if (tca[TCA_ACT_TAB] == NULL) {
932 printk("tc_ctl_action: received NO action attribs\n"); 967 printk("tc_ctl_action: received NO action attribs\n");
933 return -EINVAL; 968 return -EINVAL;
934 } 969 }
@@ -946,15 +981,15 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
946 if (n->nlmsg_flags&NLM_F_REPLACE) 981 if (n->nlmsg_flags&NLM_F_REPLACE)
947 ovr = 1; 982 ovr = 1;
948replay: 983replay:
949 ret = tcf_action_add(tca[TCA_ACT_TAB-1], n, pid, ovr); 984 ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr);
950 if (ret == -EAGAIN) 985 if (ret == -EAGAIN)
951 goto replay; 986 goto replay;
952 break; 987 break;
953 case RTM_DELACTION: 988 case RTM_DELACTION:
954 ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_DELACTION); 989 ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION);
955 break; 990 break;
956 case RTM_GETACTION: 991 case RTM_GETACTION:
957 ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_GETACTION); 992 ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION);
958 break; 993 break;
959 default: 994 default:
960 BUG(); 995 BUG();
@@ -963,33 +998,30 @@ replay:
963 return ret; 998 return ret;
964} 999}
965 1000
966static struct rtattr * 1001static struct nlattr *
967find_dump_kind(struct nlmsghdr *n) 1002find_dump_kind(struct nlmsghdr *n)
968{ 1003{
969 struct rtattr *tb1, *tb2[TCA_ACT_MAX+1]; 1004 struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
970 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1]; 1005 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
971 struct rtattr *rta[TCAA_MAX + 1]; 1006 struct nlattr *nla[TCAA_MAX + 1];
972 struct rtattr *kind; 1007 struct nlattr *kind;
973 int min_len = NLMSG_LENGTH(sizeof(struct tcamsg)); 1008
974 int attrlen = n->nlmsg_len - NLMSG_ALIGN(min_len); 1009 if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
975 struct rtattr *attr = (void *) n + NLMSG_ALIGN(min_len);
976
977 if (rtattr_parse(rta, TCAA_MAX, attr, attrlen) < 0)
978 return NULL; 1010 return NULL;
979 tb1 = rta[TCA_ACT_TAB - 1]; 1011 tb1 = nla[TCA_ACT_TAB];
980 if (tb1 == NULL) 1012 if (tb1 == NULL)
981 return NULL; 1013 return NULL;
982 1014
983 if (rtattr_parse(tb, TCA_ACT_MAX_PRIO, RTA_DATA(tb1), 1015 if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
984 NLMSG_ALIGN(RTA_PAYLOAD(tb1))) < 0) 1016 NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
985 return NULL;
986 if (tb[0] == NULL)
987 return NULL; 1017 return NULL;
988 1018
989 if (rtattr_parse(tb2, TCA_ACT_MAX, RTA_DATA(tb[0]), 1019 if (tb[1] == NULL)
990 RTA_PAYLOAD(tb[0])) < 0)
991 return NULL; 1020 return NULL;
992 kind = tb2[TCA_ACT_KIND-1]; 1021 if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]),
1022 nla_len(tb[1]), NULL) < 0)
1023 return NULL;
1024 kind = tb2[TCA_ACT_KIND];
993 1025
994 return kind; 1026 return kind;
995} 1027}
@@ -997,14 +1029,18 @@ find_dump_kind(struct nlmsghdr *n)
997static int 1029static int
998tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) 1030tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
999{ 1031{
1032 struct net *net = skb->sk->sk_net;
1000 struct nlmsghdr *nlh; 1033 struct nlmsghdr *nlh;
1001 unsigned char *b = skb_tail_pointer(skb); 1034 unsigned char *b = skb_tail_pointer(skb);
1002 struct rtattr *x; 1035 struct nlattr *nest;
1003 struct tc_action_ops *a_o; 1036 struct tc_action_ops *a_o;
1004 struct tc_action a; 1037 struct tc_action a;
1005 int ret = 0; 1038 int ret = 0;
1006 struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); 1039 struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
1007 struct rtattr *kind = find_dump_kind(cb->nlh); 1040 struct nlattr *kind = find_dump_kind(cb->nlh);
1041
1042 if (net != &init_net)
1043 return 0;
1008 1044
1009 if (kind == NULL) { 1045 if (kind == NULL) {
1010 printk("tc_dump_action: action bad kind\n"); 1046 printk("tc_dump_action: action bad kind\n");
@@ -1021,7 +1057,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1021 1057
1022 if (a_o->walk == NULL) { 1058 if (a_o->walk == NULL) {
1023 printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind); 1059 printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
1024 goto rtattr_failure; 1060 goto nla_put_failure;
1025 } 1061 }
1026 1062
1027 nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 1063 nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
@@ -1031,18 +1067,19 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1031 t->tca__pad1 = 0; 1067 t->tca__pad1 = 0;
1032 t->tca__pad2 = 0; 1068 t->tca__pad2 = 0;
1033 1069
1034 x = (struct rtattr *)skb_tail_pointer(skb); 1070 nest = nla_nest_start(skb, TCA_ACT_TAB);
1035 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 1071 if (nest == NULL)
1072 goto nla_put_failure;
1036 1073
1037 ret = a_o->walk(skb, cb, RTM_GETACTION, &a); 1074 ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
1038 if (ret < 0) 1075 if (ret < 0)
1039 goto rtattr_failure; 1076 goto nla_put_failure;
1040 1077
1041 if (ret > 0) { 1078 if (ret > 0) {
1042 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 1079 nla_nest_end(skb, nest);
1043 ret = skb->len; 1080 ret = skb->len;
1044 } else 1081 } else
1045 nlmsg_trim(skb, x); 1082 nla_nest_cancel(skb, nest);
1046 1083
1047 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1084 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1048 if (NETLINK_CB(cb->skb).pid && ret) 1085 if (NETLINK_CB(cb->skb).pid && ret)
@@ -1050,7 +1087,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1050 module_put(a_o->owner); 1087 module_put(a_o->owner);
1051 return skb->len; 1088 return skb->len;
1052 1089
1053rtattr_failure: 1090nla_put_failure:
1054nlmsg_failure: 1091nlmsg_failure:
1055 module_put(a_o->owner); 1092 module_put(a_o->owner);
1056 nlmsg_trim(skb, b); 1093 nlmsg_trim(skb, b);
@@ -1067,8 +1104,3 @@ static int __init tc_action_init(void)
1067} 1104}
1068 1105
1069subsys_initcall(tc_action_init); 1106subsys_initcall(tc_action_init);
1070
1071EXPORT_SYMBOL(tcf_register_action);
1072EXPORT_SYMBOL(tcf_unregister_action);
1073EXPORT_SYMBOL(tcf_action_exec);
1074EXPORT_SYMBOL(tcf_action_dump_1);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index a9631e426d91..422872c4f14b 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -53,28 +53,34 @@ typedef int (*g_rand)(struct tcf_gact *gact);
53static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; 53static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
54#endif /* CONFIG_GACT_PROB */ 54#endif /* CONFIG_GACT_PROB */
55 55
56static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, 56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
57 [TCA_GACT_PARMS] = { .len = sizeof(struct tc_gact) },
58 [TCA_GACT_PROB] = { .len = sizeof(struct tc_gact_p) },
59};
60
61static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
57 struct tc_action *a, int ovr, int bind) 62 struct tc_action *a, int ovr, int bind)
58{ 63{
59 struct rtattr *tb[TCA_GACT_MAX]; 64 struct nlattr *tb[TCA_GACT_MAX + 1];
60 struct tc_gact *parm; 65 struct tc_gact *parm;
61 struct tcf_gact *gact; 66 struct tcf_gact *gact;
62 struct tcf_common *pc; 67 struct tcf_common *pc;
63 int ret = 0; 68 int ret = 0;
69 int err;
64 70
65 if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) 71 if (nla == NULL)
66 return -EINVAL; 72 return -EINVAL;
67 73
68 if (tb[TCA_GACT_PARMS - 1] == NULL || 74 err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy);
69 RTA_PAYLOAD(tb[TCA_GACT_PARMS - 1]) < sizeof(*parm)) 75 if (err < 0)
76 return err;
77
78 if (tb[TCA_GACT_PARMS] == NULL)
70 return -EINVAL; 79 return -EINVAL;
71 parm = RTA_DATA(tb[TCA_GACT_PARMS - 1]); 80 parm = nla_data(tb[TCA_GACT_PARMS]);
72 81
73 if (tb[TCA_GACT_PROB-1] != NULL) 82#ifndef CONFIG_GACT_PROB
74#ifdef CONFIG_GACT_PROB 83 if (tb[TCA_GACT_PROB] != NULL)
75 if (RTA_PAYLOAD(tb[TCA_GACT_PROB-1]) < sizeof(struct tc_gact_p))
76 return -EINVAL;
77#else
78 return -EOPNOTSUPP; 84 return -EOPNOTSUPP;
79#endif 85#endif
80 86
@@ -97,8 +103,8 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
97 spin_lock_bh(&gact->tcf_lock); 103 spin_lock_bh(&gact->tcf_lock);
98 gact->tcf_action = parm->action; 104 gact->tcf_action = parm->action;
99#ifdef CONFIG_GACT_PROB 105#ifdef CONFIG_GACT_PROB
100 if (tb[TCA_GACT_PROB-1] != NULL) { 106 if (tb[TCA_GACT_PROB] != NULL) {
101 struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); 107 struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
102 gact->tcfg_paction = p_parm->paction; 108 gact->tcfg_paction = p_parm->paction;
103 gact->tcfg_pval = p_parm->pval; 109 gact->tcfg_pval = p_parm->pval;
104 gact->tcfg_ptype = p_parm->ptype; 110 gact->tcfg_ptype = p_parm->ptype;
@@ -154,23 +160,23 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
154 opt.refcnt = gact->tcf_refcnt - ref; 160 opt.refcnt = gact->tcf_refcnt - ref;
155 opt.bindcnt = gact->tcf_bindcnt - bind; 161 opt.bindcnt = gact->tcf_bindcnt - bind;
156 opt.action = gact->tcf_action; 162 opt.action = gact->tcf_action;
157 RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); 163 NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
158#ifdef CONFIG_GACT_PROB 164#ifdef CONFIG_GACT_PROB
159 if (gact->tcfg_ptype) { 165 if (gact->tcfg_ptype) {
160 struct tc_gact_p p_opt; 166 struct tc_gact_p p_opt;
161 p_opt.paction = gact->tcfg_paction; 167 p_opt.paction = gact->tcfg_paction;
162 p_opt.pval = gact->tcfg_pval; 168 p_opt.pval = gact->tcfg_pval;
163 p_opt.ptype = gact->tcfg_ptype; 169 p_opt.ptype = gact->tcfg_ptype;
164 RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); 170 NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
165 } 171 }
166#endif 172#endif
167 t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); 173 t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
168 t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); 174 t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
169 t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); 175 t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
170 RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); 176 NLA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
171 return skb->len; 177 return skb->len;
172 178
173rtattr_failure: 179nla_put_failure:
174 nlmsg_trim(skb, b); 180 nlmsg_trim(skb, b);
175 return -1; 181 return -1;
176} 182}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index fa006e06ce33..da696fd3e341 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -92,10 +92,17 @@ static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
92 return ret; 92 return ret;
93} 93}
94 94
95static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, 95static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
96 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
97 [TCA_IPT_HOOK] = { .type = NLA_U32 },
98 [TCA_IPT_INDEX] = { .type = NLA_U32 },
99 [TCA_IPT_TARG] = { .len = sizeof(struct ipt_entry_target) },
100};
101
102static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
96 struct tc_action *a, int ovr, int bind) 103 struct tc_action *a, int ovr, int bind)
97{ 104{
98 struct rtattr *tb[TCA_IPT_MAX]; 105 struct nlattr *tb[TCA_IPT_MAX + 1];
99 struct tcf_ipt *ipt; 106 struct tcf_ipt *ipt;
100 struct tcf_common *pc; 107 struct tcf_common *pc;
101 struct ipt_entry_target *td, *t; 108 struct ipt_entry_target *td, *t;
@@ -104,22 +111,24 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
104 u32 hook = 0; 111 u32 hook = 0;
105 u32 index = 0; 112 u32 index = 0;
106 113
107 if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0) 114 if (nla == NULL)
108 return -EINVAL; 115 return -EINVAL;
109 116
110 if (tb[TCA_IPT_HOOK-1] == NULL || 117 err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy);
111 RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32)) 118 if (err < 0)
119 return err;
120
121 if (tb[TCA_IPT_HOOK] == NULL)
112 return -EINVAL; 122 return -EINVAL;
113 if (tb[TCA_IPT_TARG-1] == NULL || 123 if (tb[TCA_IPT_TARG] == NULL)
114 RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
115 return -EINVAL; 124 return -EINVAL;
116 td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]); 125
117 if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size) 126 td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
127 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
118 return -EINVAL; 128 return -EINVAL;
119 129
120 if (tb[TCA_IPT_INDEX-1] != NULL && 130 if (tb[TCA_IPT_INDEX] != NULL)
121 RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) 131 index = nla_get_u32(tb[TCA_IPT_INDEX]);
122 index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
123 132
124 pc = tcf_hash_check(index, a, bind, &ipt_hash_info); 133 pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
125 if (!pc) { 134 if (!pc) {
@@ -136,14 +145,14 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
136 } 145 }
137 ipt = to_ipt(pc); 146 ipt = to_ipt(pc);
138 147
139 hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); 148 hook = nla_get_u32(tb[TCA_IPT_HOOK]);
140 149
141 err = -ENOMEM; 150 err = -ENOMEM;
142 tname = kmalloc(IFNAMSIZ, GFP_KERNEL); 151 tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
143 if (unlikely(!tname)) 152 if (unlikely(!tname))
144 goto err1; 153 goto err1;
145 if (tb[TCA_IPT_TABLE - 1] == NULL || 154 if (tb[TCA_IPT_TABLE] == NULL ||
146 rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) 155 nla_strlcpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ)
147 strcpy(tname, "mangle"); 156 strcpy(tname, "mangle");
148 157
149 t = kmemdup(td, td->u.target_size, GFP_KERNEL); 158 t = kmemdup(td, td->u.target_size, GFP_KERNEL);
@@ -243,25 +252,25 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
243 252
244 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); 253 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
245 if (unlikely(!t)) 254 if (unlikely(!t))
246 goto rtattr_failure; 255 goto nla_put_failure;
247 256
248 c.bindcnt = ipt->tcf_bindcnt - bind; 257 c.bindcnt = ipt->tcf_bindcnt - bind;
249 c.refcnt = ipt->tcf_refcnt - ref; 258 c.refcnt = ipt->tcf_refcnt - ref;
250 strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); 259 strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
251 260
252 RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); 261 NLA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
253 RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); 262 NLA_PUT_U32(skb, TCA_IPT_INDEX, ipt->tcf_index);
254 RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); 263 NLA_PUT_U32(skb, TCA_IPT_HOOK, ipt->tcfi_hook);
255 RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); 264 NLA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
256 RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); 265 NLA_PUT_STRING(skb, TCA_IPT_TABLE, ipt->tcfi_tname);
257 tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); 266 tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
258 tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); 267 tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
259 tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); 268 tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
260 RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); 269 NLA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
261 kfree(t); 270 kfree(t);
262 return skb->len; 271 return skb->len;
263 272
264rtattr_failure: 273nla_put_failure:
265 nlmsg_trim(skb, b); 274 nlmsg_trim(skb, b);
266 kfree(t); 275 kfree(t);
267 return -1; 276 return -1;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c3fde9180f9d..1aff005d95cd 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -54,24 +54,31 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
54 return 0; 54 return 0;
55} 55}
56 56
57static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, 57static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
58 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
59};
60
61static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
58 struct tc_action *a, int ovr, int bind) 62 struct tc_action *a, int ovr, int bind)
59{ 63{
60 struct rtattr *tb[TCA_MIRRED_MAX]; 64 struct nlattr *tb[TCA_MIRRED_MAX + 1];
61 struct tc_mirred *parm; 65 struct tc_mirred *parm;
62 struct tcf_mirred *m; 66 struct tcf_mirred *m;
63 struct tcf_common *pc; 67 struct tcf_common *pc;
64 struct net_device *dev = NULL; 68 struct net_device *dev = NULL;
65 int ret = 0; 69 int ret = 0, err;
66 int ok_push = 0; 70 int ok_push = 0;
67 71
68 if (rta == NULL || rtattr_parse_nested(tb, TCA_MIRRED_MAX, rta) < 0) 72 if (nla == NULL)
69 return -EINVAL; 73 return -EINVAL;
70 74
71 if (tb[TCA_MIRRED_PARMS-1] == NULL || 75 err = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
72 RTA_PAYLOAD(tb[TCA_MIRRED_PARMS-1]) < sizeof(*parm)) 76 if (err < 0)
77 return err;
78
79 if (tb[TCA_MIRRED_PARMS] == NULL)
73 return -EINVAL; 80 return -EINVAL;
74 parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); 81 parm = nla_data(tb[TCA_MIRRED_PARMS]);
75 82
76 if (parm->ifindex) { 83 if (parm->ifindex) {
77 dev = __dev_get_by_index(&init_net, parm->ifindex); 84 dev = __dev_get_by_index(&init_net, parm->ifindex);
@@ -207,14 +214,14 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
207 opt.bindcnt = m->tcf_bindcnt - bind; 214 opt.bindcnt = m->tcf_bindcnt - bind;
208 opt.eaction = m->tcfm_eaction; 215 opt.eaction = m->tcfm_eaction;
209 opt.ifindex = m->tcfm_ifindex; 216 opt.ifindex = m->tcfm_ifindex;
210 RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); 217 NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
211 t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); 218 t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
212 t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); 219 t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
213 t.expires = jiffies_to_clock_t(m->tcf_tm.expires); 220 t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
214 RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); 221 NLA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
215 return skb->len; 222 return skb->len;
216 223
217rtattr_failure: 224nla_put_failure:
218 nlmsg_trim(skb, b); 225 nlmsg_trim(skb, b);
219 return -1; 226 return -1;
220} 227}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c96273bcaf9c..0a3c8339767a 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -40,22 +40,29 @@ static struct tcf_hashinfo nat_hash_info = {
40 .lock = &nat_lock, 40 .lock = &nat_lock,
41}; 41};
42 42
43static int tcf_nat_init(struct rtattr *rta, struct rtattr *est, 43static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
44 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
45};
46
47static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
44 struct tc_action *a, int ovr, int bind) 48 struct tc_action *a, int ovr, int bind)
45{ 49{
46 struct rtattr *tb[TCA_NAT_MAX]; 50 struct nlattr *tb[TCA_NAT_MAX + 1];
47 struct tc_nat *parm; 51 struct tc_nat *parm;
48 int ret = 0; 52 int ret = 0, err;
49 struct tcf_nat *p; 53 struct tcf_nat *p;
50 struct tcf_common *pc; 54 struct tcf_common *pc;
51 55
52 if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0) 56 if (nla == NULL)
53 return -EINVAL; 57 return -EINVAL;
54 58
55 if (tb[TCA_NAT_PARMS - 1] == NULL || 59 err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
56 RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm)) 60 if (err < 0)
61 return err;
62
63 if (tb[TCA_NAT_PARMS] == NULL)
57 return -EINVAL; 64 return -EINVAL;
58 parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]); 65 parm = nla_data(tb[TCA_NAT_PARMS]);
59 66
60 pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); 67 pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info);
61 if (!pc) { 68 if (!pc) {
@@ -151,7 +158,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
151 else 158 else
152 iph->daddr = new_addr; 159 iph->daddr = new_addr;
153 160
154 nf_csum_replace4(&iph->check, addr, new_addr); 161 csum_replace4(&iph->check, addr, new_addr);
155 } 162 }
156 163
157 ihl = iph->ihl * 4; 164 ihl = iph->ihl * 4;
@@ -169,7 +176,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
169 goto drop; 176 goto drop;
170 177
171 tcph = (void *)(skb_network_header(skb) + ihl); 178 tcph = (void *)(skb_network_header(skb) + ihl);
172 nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); 179 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
173 break; 180 break;
174 } 181 }
175 case IPPROTO_UDP: 182 case IPPROTO_UDP:
@@ -184,8 +191,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
184 191
185 udph = (void *)(skb_network_header(skb) + ihl); 192 udph = (void *)(skb_network_header(skb) + ihl);
186 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 193 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
187 nf_proto_csum_replace4(&udph->check, skb, addr, 194 inet_proto_csum_replace4(&udph->check, skb, addr,
188 new_addr, 1); 195 new_addr, 1);
189 if (!udph->check) 196 if (!udph->check)
190 udph->check = CSUM_MANGLED_0; 197 udph->check = CSUM_MANGLED_0;
191 } 198 }
@@ -232,8 +239,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
232 else 239 else
233 iph->saddr = new_addr; 240 iph->saddr = new_addr;
234 241
235 nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 242 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
236 1); 243 1);
237 break; 244 break;
238 } 245 }
239 default: 246 default:
@@ -275,17 +282,17 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
275 opt->refcnt = p->tcf_refcnt - ref; 282 opt->refcnt = p->tcf_refcnt - ref;
276 opt->bindcnt = p->tcf_bindcnt - bind; 283 opt->bindcnt = p->tcf_bindcnt - bind;
277 284
278 RTA_PUT(skb, TCA_NAT_PARMS, s, opt); 285 NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
279 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 286 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
280 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 287 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
281 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 288 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
282 RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); 289 NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
283 290
284 kfree(opt); 291 kfree(opt);
285 292
286 return skb->len; 293 return skb->len;
287 294
288rtattr_failure: 295nla_put_failure:
289 nlmsg_trim(skb, b); 296 nlmsg_trim(skb, b);
290 kfree(opt); 297 kfree(opt);
291 return -1; 298 return -1;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b46fab5fb323..3cc4cb9e500e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -33,26 +33,33 @@ static struct tcf_hashinfo pedit_hash_info = {
33 .lock = &pedit_lock, 33 .lock = &pedit_lock,
34}; 34};
35 35
36static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, 36static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
37 [TCA_PEDIT_PARMS] = { .len = sizeof(struct tcf_pedit) },
38};
39
40static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
37 struct tc_action *a, int ovr, int bind) 41 struct tc_action *a, int ovr, int bind)
38{ 42{
39 struct rtattr *tb[TCA_PEDIT_MAX]; 43 struct nlattr *tb[TCA_PEDIT_MAX + 1];
40 struct tc_pedit *parm; 44 struct tc_pedit *parm;
41 int ret = 0; 45 int ret = 0, err;
42 struct tcf_pedit *p; 46 struct tcf_pedit *p;
43 struct tcf_common *pc; 47 struct tcf_common *pc;
44 struct tc_pedit_key *keys = NULL; 48 struct tc_pedit_key *keys = NULL;
45 int ksize; 49 int ksize;
46 50
47 if (rta == NULL || rtattr_parse_nested(tb, TCA_PEDIT_MAX, rta) < 0) 51 if (nla == NULL)
48 return -EINVAL; 52 return -EINVAL;
49 53
50 if (tb[TCA_PEDIT_PARMS - 1] == NULL || 54 err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy);
51 RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm)) 55 if (err < 0)
56 return err;
57
58 if (tb[TCA_PEDIT_PARMS] == NULL)
52 return -EINVAL; 59 return -EINVAL;
53 parm = RTA_DATA(tb[TCA_PEDIT_PARMS-1]); 60 parm = nla_data(tb[TCA_PEDIT_PARMS]);
54 ksize = parm->nkeys * sizeof(struct tc_pedit_key); 61 ksize = parm->nkeys * sizeof(struct tc_pedit_key);
55 if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) 62 if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
56 return -EINVAL; 63 return -EINVAL;
57 64
58 pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); 65 pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
@@ -206,15 +213,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
206 opt->refcnt = p->tcf_refcnt - ref; 213 opt->refcnt = p->tcf_refcnt - ref;
207 opt->bindcnt = p->tcf_bindcnt - bind; 214 opt->bindcnt = p->tcf_bindcnt - bind;
208 215
209 RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); 216 NLA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
210 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 217 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
211 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 218 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
212 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 219 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
213 RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); 220 NLA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
214 kfree(opt); 221 kfree(opt);
215 return skb->len; 222 return skb->len;
216 223
217rtattr_failure: 224nla_put_failure:
218 nlmsg_trim(skb, b); 225 nlmsg_trim(skb, b);
219 kfree(opt); 226 kfree(opt);
220 return -1; 227 return -1;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a73e3e6d87ea..0898120bbcc0 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -54,7 +54,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
54{ 54{
55 struct tcf_common *p; 55 struct tcf_common *p;
56 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 56 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
57 struct rtattr *r; 57 struct nlattr *nest;
58 58
59 read_lock_bh(&police_lock); 59 read_lock_bh(&police_lock);
60 60
@@ -69,18 +69,19 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
69 continue; 69 continue;
70 a->priv = p; 70 a->priv = p;
71 a->order = index; 71 a->order = index;
72 r = (struct rtattr *)skb_tail_pointer(skb); 72 nest = nla_nest_start(skb, a->order);
73 RTA_PUT(skb, a->order, 0, NULL); 73 if (nest == NULL)
74 goto nla_put_failure;
74 if (type == RTM_DELACTION) 75 if (type == RTM_DELACTION)
75 err = tcf_action_dump_1(skb, a, 0, 1); 76 err = tcf_action_dump_1(skb, a, 0, 1);
76 else 77 else
77 err = tcf_action_dump_1(skb, a, 0, 0); 78 err = tcf_action_dump_1(skb, a, 0, 0);
78 if (err < 0) { 79 if (err < 0) {
79 index--; 80 index--;
80 nlmsg_trim(skb, r); 81 nla_nest_cancel(skb, nest);
81 goto done; 82 goto done;
82 } 83 }
83 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 84 nla_nest_end(skb, nest);
84 n_i++; 85 n_i++;
85 } 86 }
86 } 87 }
@@ -90,8 +91,8 @@ done:
90 cb->args[0] += n_i; 91 cb->args[0] += n_i;
91 return n_i; 92 return n_i;
92 93
93rtattr_failure: 94nla_put_failure:
94 nlmsg_trim(skb, r); 95 nla_nest_cancel(skb, nest);
95 goto done; 96 goto done;
96} 97}
97 98
@@ -118,33 +119,37 @@ static void tcf_police_destroy(struct tcf_police *p)
118 BUG_TRAP(0); 119 BUG_TRAP(0);
119} 120}
120 121
121static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, 122static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
123 [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE },
124 [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE },
125 [TCA_POLICE_AVRATE] = { .type = NLA_U32 },
126 [TCA_POLICE_RESULT] = { .type = NLA_U32 },
127};
128
129static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
122 struct tc_action *a, int ovr, int bind) 130 struct tc_action *a, int ovr, int bind)
123{ 131{
124 unsigned h; 132 unsigned h;
125 int ret = 0, err; 133 int ret = 0, err;
126 struct rtattr *tb[TCA_POLICE_MAX]; 134 struct nlattr *tb[TCA_POLICE_MAX + 1];
127 struct tc_police *parm; 135 struct tc_police *parm;
128 struct tcf_police *police; 136 struct tcf_police *police;
129 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 137 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
130 int size; 138 int size;
131 139
132 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 140 if (nla == NULL)
133 return -EINVAL; 141 return -EINVAL;
134 142
135 if (tb[TCA_POLICE_TBF-1] == NULL) 143 err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
136 return -EINVAL; 144 if (err < 0)
137 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); 145 return err;
138 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
139 return -EINVAL;
140 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
141 146
142 if (tb[TCA_POLICE_RESULT-1] != NULL && 147 if (tb[TCA_POLICE_TBF] == NULL)
143 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
144 return -EINVAL; 148 return -EINVAL;
145 if (tb[TCA_POLICE_RESULT-1] != NULL && 149 size = nla_len(tb[TCA_POLICE_TBF]);
146 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 150 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
147 return -EINVAL; 151 return -EINVAL;
152 parm = nla_data(tb[TCA_POLICE_TBF]);
148 153
149 if (parm->index) { 154 if (parm->index) {
150 struct tcf_common *pc; 155 struct tcf_common *pc;
@@ -174,12 +179,12 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
174override: 179override:
175 if (parm->rate.rate) { 180 if (parm->rate.rate) {
176 err = -ENOMEM; 181 err = -ENOMEM;
177 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 182 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
178 if (R_tab == NULL) 183 if (R_tab == NULL)
179 goto failure; 184 goto failure;
180 if (parm->peakrate.rate) { 185 if (parm->peakrate.rate) {
181 P_tab = qdisc_get_rtab(&parm->peakrate, 186 P_tab = qdisc_get_rtab(&parm->peakrate,
182 tb[TCA_POLICE_PEAKRATE-1]); 187 tb[TCA_POLICE_PEAKRATE]);
183 if (P_tab == NULL) { 188 if (P_tab == NULL) {
184 qdisc_put_rtab(R_tab); 189 qdisc_put_rtab(R_tab);
185 goto failure; 190 goto failure;
@@ -197,8 +202,8 @@ override:
197 police->tcfp_P_tab = P_tab; 202 police->tcfp_P_tab = P_tab;
198 } 203 }
199 204
200 if (tb[TCA_POLICE_RESULT-1]) 205 if (tb[TCA_POLICE_RESULT])
201 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 206 police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
202 police->tcfp_toks = police->tcfp_burst = parm->burst; 207 police->tcfp_toks = police->tcfp_burst = parm->burst;
203 police->tcfp_mtu = parm->mtu; 208 police->tcfp_mtu = parm->mtu;
204 if (police->tcfp_mtu == 0) { 209 if (police->tcfp_mtu == 0) {
@@ -210,9 +215,8 @@ override:
210 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); 215 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
211 police->tcf_action = parm->action; 216 police->tcf_action = parm->action;
212 217
213 if (tb[TCA_POLICE_AVRATE-1]) 218 if (tb[TCA_POLICE_AVRATE])
214 police->tcfp_ewma_rate = 219 police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
215 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
216 if (est) 220 if (est)
217 gen_replace_estimator(&police->tcf_bstats, 221 gen_replace_estimator(&police->tcf_bstats,
218 &police->tcf_rate_est, 222 &police->tcf_rate_est,
@@ -332,15 +336,14 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
332 opt.peakrate = police->tcfp_P_tab->rate; 336 opt.peakrate = police->tcfp_P_tab->rate;
333 else 337 else
334 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 338 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
335 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 339 NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
336 if (police->tcfp_result) 340 if (police->tcfp_result)
337 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), 341 NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
338 &police->tcfp_result);
339 if (police->tcfp_ewma_rate) 342 if (police->tcfp_ewma_rate)
340 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); 343 NLA_PUT_U32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate);
341 return skb->len; 344 return skb->len;
342 345
343rtattr_failure: 346nla_put_failure:
344 nlmsg_trim(skb, b); 347 nlmsg_trim(skb, b);
345 return -1; 348 return -1;
346} 349}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index fb84ef33d14f..fbde461b716c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -84,30 +84,37 @@ static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
84 return alloc_defdata(d, datalen, defdata); 84 return alloc_defdata(d, datalen, defdata);
85} 85}
86 86
87static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, 87static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
88 [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) },
89};
90
91static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
88 struct tc_action *a, int ovr, int bind) 92 struct tc_action *a, int ovr, int bind)
89{ 93{
90 struct rtattr *tb[TCA_DEF_MAX]; 94 struct nlattr *tb[TCA_DEF_MAX + 1];
91 struct tc_defact *parm; 95 struct tc_defact *parm;
92 struct tcf_defact *d; 96 struct tcf_defact *d;
93 struct tcf_common *pc; 97 struct tcf_common *pc;
94 void *defdata; 98 void *defdata;
95 u32 datalen = 0; 99 u32 datalen = 0;
96 int ret = 0; 100 int ret = 0, err;
97 101
98 if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) 102 if (nla == NULL)
99 return -EINVAL; 103 return -EINVAL;
100 104
101 if (tb[TCA_DEF_PARMS - 1] == NULL || 105 err = nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL);
102 RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) 106 if (err < 0)
107 return err;
108
109 if (tb[TCA_DEF_PARMS] == NULL)
103 return -EINVAL; 110 return -EINVAL;
104 111
105 parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); 112 parm = nla_data(tb[TCA_DEF_PARMS]);
106 defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); 113 defdata = nla_data(tb[TCA_DEF_DATA]);
107 if (defdata == NULL) 114 if (defdata == NULL)
108 return -EINVAL; 115 return -EINVAL;
109 116
110 datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); 117 datalen = nla_len(tb[TCA_DEF_DATA]);
111 if (datalen <= 0) 118 if (datalen <= 0)
112 return -EINVAL; 119 return -EINVAL;
113 120
@@ -164,15 +171,15 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
164 opt.refcnt = d->tcf_refcnt - ref; 171 opt.refcnt = d->tcf_refcnt - ref;
165 opt.bindcnt = d->tcf_bindcnt - bind; 172 opt.bindcnt = d->tcf_bindcnt - bind;
166 opt.action = d->tcf_action; 173 opt.action = d->tcf_action;
167 RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); 174 NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
168 RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); 175 NLA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
169 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); 176 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
170 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); 177 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
171 t.expires = jiffies_to_clock_t(d->tcf_tm.expires); 178 t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
172 RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); 179 NLA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
173 return skb->len; 180 return skb->len;
174 181
175rtattr_failure: 182nla_put_failure:
176 nlmsg_trim(skb, b); 183 nlmsg_trim(skb, b);
177 return -1; 184 return -1;
178} 185}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 03657976fd50..0fbedcabf111 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,33 +23,30 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/kmod.h> 24#include <linux/kmod.h>
25#include <linux/netlink.h> 25#include <linux/netlink.h>
26#include <linux/err.h>
27#include <net/net_namespace.h>
28#include <net/sock.h>
26#include <net/netlink.h> 29#include <net/netlink.h>
27#include <net/pkt_sched.h> 30#include <net/pkt_sched.h>
28#include <net/pkt_cls.h> 31#include <net/pkt_cls.h>
29 32
30#if 0 /* control */
31#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
32#else
33#define DPRINTK(format,args...)
34#endif
35
36/* The list of all installed classifier types */ 33/* The list of all installed classifier types */
37 34
38static struct tcf_proto_ops *tcf_proto_base; 35static struct tcf_proto_ops *tcf_proto_base __read_mostly;
39 36
40/* Protects list of registered TC modules. It is pure SMP lock. */ 37/* Protects list of registered TC modules. It is pure SMP lock. */
41static DEFINE_RWLOCK(cls_mod_lock); 38static DEFINE_RWLOCK(cls_mod_lock);
42 39
43/* Find classifier type by string name */ 40/* Find classifier type by string name */
44 41
45static struct tcf_proto_ops * tcf_proto_lookup_ops(struct rtattr *kind) 42static struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
46{ 43{
47 struct tcf_proto_ops *t = NULL; 44 struct tcf_proto_ops *t = NULL;
48 45
49 if (kind) { 46 if (kind) {
50 read_lock(&cls_mod_lock); 47 read_lock(&cls_mod_lock);
51 for (t = tcf_proto_base; t; t = t->next) { 48 for (t = tcf_proto_base; t; t = t->next) {
52 if (rtattr_strcmp(kind, t->kind) == 0) { 49 if (nla_strcmp(kind, t->kind) == 0) {
53 if (!try_module_get(t->owner)) 50 if (!try_module_get(t->owner))
54 t = NULL; 51 t = NULL;
55 break; 52 break;
@@ -79,6 +76,7 @@ out:
79 write_unlock(&cls_mod_lock); 76 write_unlock(&cls_mod_lock);
80 return rc; 77 return rc;
81} 78}
79EXPORT_SYMBOL(register_tcf_proto_ops);
82 80
83int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 81int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
84{ 82{
@@ -98,6 +96,7 @@ out:
98 write_unlock(&cls_mod_lock); 96 write_unlock(&cls_mod_lock);
99 return rc; 97 return rc;
100} 98}
99EXPORT_SYMBOL(unregister_tcf_proto_ops);
101 100
102static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, 101static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
103 struct tcf_proto *tp, unsigned long fh, int event); 102 struct tcf_proto *tp, unsigned long fh, int event);
@@ -105,9 +104,9 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
105 104
106/* Select new prio value from the range, managed by kernel. */ 105/* Select new prio value from the range, managed by kernel. */
107 106
108static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp) 107static inline u32 tcf_auto_prio(struct tcf_proto *tp)
109{ 108{
110 u32 first = TC_H_MAKE(0xC0000000U,0U); 109 u32 first = TC_H_MAKE(0xC0000000U, 0U);
111 110
112 if (tp) 111 if (tp)
113 first = tp->prio-1; 112 first = tp->prio-1;
@@ -119,7 +118,8 @@ static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp)
119 118
120static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 119static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
121{ 120{
122 struct rtattr **tca; 121 struct net *net = skb->sk->sk_net;
122 struct nlattr *tca[TCA_MAX + 1];
123 struct tcmsg *t; 123 struct tcmsg *t;
124 u32 protocol; 124 u32 protocol;
125 u32 prio; 125 u32 prio;
@@ -130,13 +130,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
130 struct tcf_proto **back, **chain; 130 struct tcf_proto **back, **chain;
131 struct tcf_proto *tp; 131 struct tcf_proto *tp;
132 struct tcf_proto_ops *tp_ops; 132 struct tcf_proto_ops *tp_ops;
133 struct Qdisc_class_ops *cops; 133 const struct Qdisc_class_ops *cops;
134 unsigned long cl; 134 unsigned long cl;
135 unsigned long fh; 135 unsigned long fh;
136 int err; 136 int err;
137 137
138 if (net != &init_net)
139 return -EINVAL;
140
138replay: 141replay:
139 tca = arg;
140 t = NLMSG_DATA(n); 142 t = NLMSG_DATA(n);
141 protocol = TC_H_MIN(t->tcm_info); 143 protocol = TC_H_MIN(t->tcm_info);
142 prio = TC_H_MAJ(t->tcm_info); 144 prio = TC_H_MAJ(t->tcm_info);
@@ -148,21 +150,29 @@ replay:
148 /* If no priority is given, user wants we allocated it. */ 150 /* If no priority is given, user wants we allocated it. */
149 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 151 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
150 return -ENOENT; 152 return -ENOENT;
151 prio = TC_H_MAKE(0x80000000U,0U); 153 prio = TC_H_MAKE(0x80000000U, 0U);
152 } 154 }
153 155
154 /* Find head of filter chain. */ 156 /* Find head of filter chain. */
155 157
156 /* Find link */ 158 /* Find link */
157 if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) 159 dev = __dev_get_by_index(&init_net, t->tcm_ifindex);
160 if (dev == NULL)
158 return -ENODEV; 161 return -ENODEV;
159 162
163 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
164 if (err < 0)
165 return err;
166
160 /* Find qdisc */ 167 /* Find qdisc */
161 if (!parent) { 168 if (!parent) {
162 q = dev->qdisc_sleeping; 169 q = dev->qdisc_sleeping;
163 parent = q->handle; 170 parent = q->handle;
164 } else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL) 171 } else {
165 return -EINVAL; 172 q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
173 if (q == NULL)
174 return -EINVAL;
175 }
166 176
167 /* Is it classful? */ 177 /* Is it classful? */
168 if ((cops = q->ops->cl_ops) == NULL) 178 if ((cops = q->ops->cl_ops) == NULL)
@@ -196,7 +206,7 @@ replay:
196 if (tp == NULL) { 206 if (tp == NULL) {
197 /* Proto-tcf does not exist, create new one */ 207 /* Proto-tcf does not exist, create new one */
198 208
199 if (tca[TCA_KIND-1] == NULL || !protocol) 209 if (tca[TCA_KIND] == NULL || !protocol)
200 goto errout; 210 goto errout;
201 211
202 err = -ENOENT; 212 err = -ENOENT;
@@ -207,17 +217,18 @@ replay:
207 /* Create new proto tcf */ 217 /* Create new proto tcf */
208 218
209 err = -ENOBUFS; 219 err = -ENOBUFS;
210 if ((tp = kzalloc(sizeof(*tp), GFP_KERNEL)) == NULL) 220 tp = kzalloc(sizeof(*tp), GFP_KERNEL);
221 if (tp == NULL)
211 goto errout; 222 goto errout;
212 err = -EINVAL; 223 err = -EINVAL;
213 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]); 224 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
214 if (tp_ops == NULL) { 225 if (tp_ops == NULL) {
215#ifdef CONFIG_KMOD 226#ifdef CONFIG_KMOD
216 struct rtattr *kind = tca[TCA_KIND-1]; 227 struct nlattr *kind = tca[TCA_KIND];
217 char name[IFNAMSIZ]; 228 char name[IFNAMSIZ];
218 229
219 if (kind != NULL && 230 if (kind != NULL &&
220 rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 231 nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
221 rtnl_unlock(); 232 rtnl_unlock();
222 request_module("cls_%s", name); 233 request_module("cls_%s", name);
223 rtnl_lock(); 234 rtnl_lock();
@@ -243,7 +254,9 @@ replay:
243 tp->q = q; 254 tp->q = q;
244 tp->classify = tp_ops->classify; 255 tp->classify = tp_ops->classify;
245 tp->classid = parent; 256 tp->classid = parent;
246 if ((err = tp_ops->init(tp)) != 0) { 257
258 err = tp_ops->init(tp);
259 if (err != 0) {
247 module_put(tp_ops->owner); 260 module_put(tp_ops->owner);
248 kfree(tp); 261 kfree(tp);
249 goto errout; 262 goto errout;
@@ -254,7 +267,7 @@ replay:
254 *back = tp; 267 *back = tp;
255 qdisc_unlock_tree(dev); 268 qdisc_unlock_tree(dev);
256 269
257 } else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind)) 270 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
258 goto errout; 271 goto errout;
259 272
260 fh = tp->ops->get(tp, t->tcm_handle); 273 fh = tp->ops->get(tp, t->tcm_handle);
@@ -272,13 +285,14 @@ replay:
272 } 285 }
273 286
274 err = -ENOENT; 287 err = -ENOENT;
275 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 288 if (n->nlmsg_type != RTM_NEWTFILTER ||
289 !(n->nlmsg_flags & NLM_F_CREATE))
276 goto errout; 290 goto errout;
277 } else { 291 } else {
278 switch (n->nlmsg_type) { 292 switch (n->nlmsg_type) {
279 case RTM_NEWTFILTER: 293 case RTM_NEWTFILTER:
280 err = -EEXIST; 294 err = -EEXIST;
281 if (n->nlmsg_flags&NLM_F_EXCL) 295 if (n->nlmsg_flags & NLM_F_EXCL)
282 goto errout; 296 goto errout;
283 break; 297 break;
284 case RTM_DELTFILTER: 298 case RTM_DELTFILTER:
@@ -308,9 +322,8 @@ errout:
308 return err; 322 return err;
309} 323}
310 324
311static int 325static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
312tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, 326 unsigned long fh, u32 pid, u32 seq, u16 flags, int event)
313 u32 pid, u32 seq, u16 flags, int event)
314{ 327{
315 struct tcmsg *tcm; 328 struct tcmsg *tcm;
316 struct nlmsghdr *nlh; 329 struct nlmsghdr *nlh;
@@ -324,18 +337,18 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
324 tcm->tcm_ifindex = tp->q->dev->ifindex; 337 tcm->tcm_ifindex = tp->q->dev->ifindex;
325 tcm->tcm_parent = tp->classid; 338 tcm->tcm_parent = tp->classid;
326 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 339 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
327 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind); 340 NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind);
328 tcm->tcm_handle = fh; 341 tcm->tcm_handle = fh;
329 if (RTM_DELTFILTER != event) { 342 if (RTM_DELTFILTER != event) {
330 tcm->tcm_handle = 0; 343 tcm->tcm_handle = 0;
331 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) 344 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
332 goto rtattr_failure; 345 goto nla_put_failure;
333 } 346 }
334 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 347 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
335 return skb->len; 348 return skb->len;
336 349
337nlmsg_failure: 350nlmsg_failure:
338rtattr_failure: 351nla_put_failure:
339 nlmsg_trim(skb, b); 352 nlmsg_trim(skb, b);
340 return -1; 353 return -1;
341} 354}
@@ -355,19 +368,20 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
355 return -EINVAL; 368 return -EINVAL;
356 } 369 }
357 370
358 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 371 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
372 n->nlmsg_flags & NLM_F_ECHO);
359} 373}
360 374
361struct tcf_dump_args 375struct tcf_dump_args {
362{
363 struct tcf_walker w; 376 struct tcf_walker w;
364 struct sk_buff *skb; 377 struct sk_buff *skb;
365 struct netlink_callback *cb; 378 struct netlink_callback *cb;
366}; 379};
367 380
368static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg) 381static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
382 struct tcf_walker *arg)
369{ 383{
370 struct tcf_dump_args *a = (void*)arg; 384 struct tcf_dump_args *a = (void *)arg;
371 385
372 return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, 386 return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
373 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); 387 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
@@ -375,16 +389,20 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walke
375 389
376static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) 390static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
377{ 391{
392 struct net *net = skb->sk->sk_net;
378 int t; 393 int t;
379 int s_t; 394 int s_t;
380 struct net_device *dev; 395 struct net_device *dev;
381 struct Qdisc *q; 396 struct Qdisc *q;
382 struct tcf_proto *tp, **chain; 397 struct tcf_proto *tp, **chain;
383 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); 398 struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
384 unsigned long cl = 0; 399 unsigned long cl = 0;
385 struct Qdisc_class_ops *cops; 400 const struct Qdisc_class_ops *cops;
386 struct tcf_dump_args arg; 401 struct tcf_dump_args arg;
387 402
403 if (net != &init_net)
404 return 0;
405
388 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 406 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
389 return skb->len; 407 return skb->len;
390 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 408 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
@@ -421,9 +439,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
421 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 439 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
422 if (cb->args[1] == 0) { 440 if (cb->args[1] == 0) {
423 if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, 441 if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
424 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) { 442 cb->nlh->nlmsg_seq, NLM_F_MULTI,
443 RTM_NEWTFILTER) <= 0)
425 break; 444 break;
426 } 445
427 cb->args[1] = 1; 446 cb->args[1] = 1;
428 } 447 }
429 if (tp->ops->walk == NULL) 448 if (tp->ops->walk == NULL)
@@ -450,8 +469,7 @@ out:
450 return skb->len; 469 return skb->len;
451} 470}
452 471
453void 472void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
454tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
455{ 473{
456#ifdef CONFIG_NET_CLS_ACT 474#ifdef CONFIG_NET_CLS_ACT
457 if (exts->action) { 475 if (exts->action) {
@@ -460,49 +478,48 @@ tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
460 } 478 }
461#endif 479#endif
462} 480}
481EXPORT_SYMBOL(tcf_exts_destroy);
463 482
464 483int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
465int 484 struct nlattr *rate_tlv, struct tcf_exts *exts,
466tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb, 485 const struct tcf_ext_map *map)
467 struct rtattr *rate_tlv, struct tcf_exts *exts,
468 struct tcf_ext_map *map)
469{ 486{
470 memset(exts, 0, sizeof(*exts)); 487 memset(exts, 0, sizeof(*exts));
471 488
472#ifdef CONFIG_NET_CLS_ACT 489#ifdef CONFIG_NET_CLS_ACT
473 { 490 {
474 int err;
475 struct tc_action *act; 491 struct tc_action *act;
476 492
477 if (map->police && tb[map->police-1]) { 493 if (map->police && tb[map->police]) {
478 act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police", 494 act = tcf_action_init_1(tb[map->police], rate_tlv,
479 TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); 495 "police", TCA_ACT_NOREPLACE,
480 if (act == NULL) 496 TCA_ACT_BIND);
481 return err; 497 if (IS_ERR(act))
498 return PTR_ERR(act);
482 499
483 act->type = TCA_OLD_COMPAT; 500 act->type = TCA_OLD_COMPAT;
484 exts->action = act; 501 exts->action = act;
485 } else if (map->action && tb[map->action-1]) { 502 } else if (map->action && tb[map->action]) {
486 act = tcf_action_init(tb[map->action-1], rate_tlv, NULL, 503 act = tcf_action_init(tb[map->action], rate_tlv, NULL,
487 TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); 504 TCA_ACT_NOREPLACE, TCA_ACT_BIND);
488 if (act == NULL) 505 if (IS_ERR(act))
489 return err; 506 return PTR_ERR(act);
490 507
491 exts->action = act; 508 exts->action = act;
492 } 509 }
493 } 510 }
494#else 511#else
495 if ((map->action && tb[map->action-1]) || 512 if ((map->action && tb[map->action]) ||
496 (map->police && tb[map->police-1])) 513 (map->police && tb[map->police]))
497 return -EOPNOTSUPP; 514 return -EOPNOTSUPP;
498#endif 515#endif
499 516
500 return 0; 517 return 0;
501} 518}
519EXPORT_SYMBOL(tcf_exts_validate);
502 520
503void 521void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
504tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, 522 struct tcf_exts *src)
505 struct tcf_exts *src)
506{ 523{
507#ifdef CONFIG_NET_CLS_ACT 524#ifdef CONFIG_NET_CLS_ACT
508 if (src->action) { 525 if (src->action) {
@@ -515,10 +532,10 @@ tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
515 } 532 }
516#endif 533#endif
517} 534}
535EXPORT_SYMBOL(tcf_exts_change);
518 536
519int 537int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
520tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, 538 const struct tcf_ext_map *map)
521 struct tcf_ext_map *map)
522{ 539{
523#ifdef CONFIG_NET_CLS_ACT 540#ifdef CONFIG_NET_CLS_ACT
524 if (map->action && exts->action) { 541 if (map->action && exts->action) {
@@ -527,39 +544,45 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
527 * to work with both old and new modes of entering 544 * to work with both old and new modes of entering
528 * tc data even if iproute2 was newer - jhs 545 * tc data even if iproute2 was newer - jhs
529 */ 546 */
530 struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); 547 struct nlattr *nest;
531 548
532 if (exts->action->type != TCA_OLD_COMPAT) { 549 if (exts->action->type != TCA_OLD_COMPAT) {
533 RTA_PUT(skb, map->action, 0, NULL); 550 nest = nla_nest_start(skb, map->action);
551 if (nest == NULL)
552 goto nla_put_failure;
534 if (tcf_action_dump(skb, exts->action, 0, 0) < 0) 553 if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
535 goto rtattr_failure; 554 goto nla_put_failure;
536 p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; 555 nla_nest_end(skb, nest);
537 } else if (map->police) { 556 } else if (map->police) {
538 RTA_PUT(skb, map->police, 0, NULL); 557 nest = nla_nest_start(skb, map->police);
558 if (nest == NULL)
559 goto nla_put_failure;
539 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) 560 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
540 goto rtattr_failure; 561 goto nla_put_failure;
541 p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; 562 nla_nest_end(skb, nest);
542 } 563 }
543 } 564 }
544#endif 565#endif
545 return 0; 566 return 0;
546rtattr_failure: __attribute__ ((unused)) 567nla_put_failure: __attribute__ ((unused))
547 return -1; 568 return -1;
548} 569}
570EXPORT_SYMBOL(tcf_exts_dump);
549 571
550int 572
551tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, 573int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
552 struct tcf_ext_map *map) 574 const struct tcf_ext_map *map)
553{ 575{
554#ifdef CONFIG_NET_CLS_ACT 576#ifdef CONFIG_NET_CLS_ACT
555 if (exts->action) 577 if (exts->action)
556 if (tcf_action_copy_stats(skb, exts->action, 1) < 0) 578 if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
557 goto rtattr_failure; 579 goto nla_put_failure;
558#endif 580#endif
559 return 0; 581 return 0;
560rtattr_failure: __attribute__ ((unused)) 582nla_put_failure: __attribute__ ((unused))
561 return -1; 583 return -1;
562} 584}
585EXPORT_SYMBOL(tcf_exts_dump_stats);
563 586
564static int __init tc_filter_init(void) 587static int __init tc_filter_init(void)
565{ 588{
@@ -572,11 +595,3 @@ static int __init tc_filter_init(void)
572} 595}
573 596
574subsys_initcall(tc_filter_init); 597subsys_initcall(tc_filter_init);
575
576EXPORT_SYMBOL(register_tcf_proto_ops);
577EXPORT_SYMBOL(unregister_tcf_proto_ops);
578EXPORT_SYMBOL(tcf_exts_validate);
579EXPORT_SYMBOL(tcf_exts_destroy);
580EXPORT_SYMBOL(tcf_exts_change);
581EXPORT_SYMBOL(tcf_exts_dump);
582EXPORT_SYMBOL(tcf_exts_dump_stats);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 8dbcf2771a46..956915c217d6 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -35,7 +35,7 @@ struct basic_filter
35 struct list_head link; 35 struct list_head link;
36}; 36};
37 37
38static struct tcf_ext_map basic_ext_map = { 38static const struct tcf_ext_map basic_ext_map = {
39 .action = TCA_BASIC_ACT, 39 .action = TCA_BASIC_ACT,
40 .police = TCA_BASIC_POLICE 40 .police = TCA_BASIC_POLICE
41}; 41};
@@ -129,28 +129,29 @@ static int basic_delete(struct tcf_proto *tp, unsigned long arg)
129 return -ENOENT; 129 return -ENOENT;
130} 130}
131 131
132static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
133 [TCA_BASIC_CLASSID] = { .type = NLA_U32 },
134 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED },
135};
136
132static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, 137static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
133 unsigned long base, struct rtattr **tb, 138 unsigned long base, struct nlattr **tb,
134 struct rtattr *est) 139 struct nlattr *est)
135{ 140{
136 int err = -EINVAL; 141 int err = -EINVAL;
137 struct tcf_exts e; 142 struct tcf_exts e;
138 struct tcf_ematch_tree t; 143 struct tcf_ematch_tree t;
139 144
140 if (tb[TCA_BASIC_CLASSID-1])
141 if (RTA_PAYLOAD(tb[TCA_BASIC_CLASSID-1]) < sizeof(u32))
142 return err;
143
144 err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map); 145 err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
145 if (err < 0) 146 if (err < 0)
146 return err; 147 return err;
147 148
148 err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES-1], &t); 149 err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
149 if (err < 0) 150 if (err < 0)
150 goto errout; 151 goto errout;
151 152
152 if (tb[TCA_BASIC_CLASSID-1]) { 153 if (tb[TCA_BASIC_CLASSID]) {
153 f->res.classid = *(u32*)RTA_DATA(tb[TCA_BASIC_CLASSID-1]); 154 f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
154 tcf_bind_filter(tp, &f->res, base); 155 tcf_bind_filter(tp, &f->res, base);
155 } 156 }
156 157
@@ -164,23 +165,25 @@ errout:
164} 165}
165 166
166static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, 167static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
167 struct rtattr **tca, unsigned long *arg) 168 struct nlattr **tca, unsigned long *arg)
168{ 169{
169 int err = -EINVAL; 170 int err;
170 struct basic_head *head = (struct basic_head *) tp->root; 171 struct basic_head *head = (struct basic_head *) tp->root;
171 struct rtattr *tb[TCA_BASIC_MAX]; 172 struct nlattr *tb[TCA_BASIC_MAX + 1];
172 struct basic_filter *f = (struct basic_filter *) *arg; 173 struct basic_filter *f = (struct basic_filter *) *arg;
173 174
174 if (tca[TCA_OPTIONS-1] == NULL) 175 if (tca[TCA_OPTIONS] == NULL)
175 return -EINVAL; 176 return -EINVAL;
176 177
177 if (rtattr_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS-1]) < 0) 178 err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
178 return -EINVAL; 179 basic_policy);
180 if (err < 0)
181 return err;
179 182
180 if (f != NULL) { 183 if (f != NULL) {
181 if (handle && f->handle != handle) 184 if (handle && f->handle != handle)
182 return -EINVAL; 185 return -EINVAL;
183 return basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]); 186 return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
184 } 187 }
185 188
186 err = -ENOBUFS; 189 err = -ENOBUFS;
@@ -206,7 +209,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
206 f->handle = head->hgenerator; 209 f->handle = head->hgenerator;
207 } 210 }
208 211
209 err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]); 212 err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
210 if (err < 0) 213 if (err < 0)
211 goto errout; 214 goto errout;
212 215
@@ -245,33 +248,33 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
245 struct sk_buff *skb, struct tcmsg *t) 248 struct sk_buff *skb, struct tcmsg *t)
246{ 249{
247 struct basic_filter *f = (struct basic_filter *) fh; 250 struct basic_filter *f = (struct basic_filter *) fh;
248 unsigned char *b = skb_tail_pointer(skb); 251 struct nlattr *nest;
249 struct rtattr *rta;
250 252
251 if (f == NULL) 253 if (f == NULL)
252 return skb->len; 254 return skb->len;
253 255
254 t->tcm_handle = f->handle; 256 t->tcm_handle = f->handle;
255 257
256 rta = (struct rtattr *) b; 258 nest = nla_nest_start(skb, TCA_OPTIONS);
257 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 259 if (nest == NULL)
260 goto nla_put_failure;
258 261
259 if (f->res.classid) 262 if (f->res.classid)
260 RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid); 263 NLA_PUT_U32(skb, TCA_BASIC_CLASSID, f->res.classid);
261 264
262 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || 265 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
263 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) 266 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
264 goto rtattr_failure; 267 goto nla_put_failure;
265 268
266 rta->rta_len = skb_tail_pointer(skb) - b; 269 nla_nest_end(skb, nest);
267 return skb->len; 270 return skb->len;
268 271
269rtattr_failure: 272nla_put_failure:
270 nlmsg_trim(skb, b); 273 nla_nest_cancel(skb, nest);
271 return -1; 274 return -1;
272} 275}
273 276
274static struct tcf_proto_ops cls_basic_ops = { 277static struct tcf_proto_ops cls_basic_ops __read_mostly = {
275 .kind = "basic", 278 .kind = "basic",
276 .classify = basic_classify, 279 .classify = basic_classify,
277 .init = basic_init, 280 .init = basic_init,
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
new file mode 100644
index 000000000000..5a7f6a3060fc
--- /dev/null
+++ b/net/sched/cls_flow.c
@@ -0,0 +1,660 @@
1/*
2 * net/sched/cls_flow.c Generic flow classifier
3 *
4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/list.h>
15#include <linux/jhash.h>
16#include <linux/random.h>
17#include <linux/pkt_cls.h>
18#include <linux/skbuff.h>
19#include <linux/in.h>
20#include <linux/ip.h>
21#include <linux/ipv6.h>
22
23#include <net/pkt_cls.h>
24#include <net/ip.h>
25#include <net/route.h>
26#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
27#include <net/netfilter/nf_conntrack.h>
28#endif
29
30struct flow_head {
31 struct list_head filters;
32};
33
34struct flow_filter {
35 struct list_head list;
36 struct tcf_exts exts;
37 struct tcf_ematch_tree ematches;
38 u32 handle;
39
40 u32 nkeys;
41 u32 keymask;
42 u32 mode;
43 u32 mask;
44 u32 xor;
45 u32 rshift;
46 u32 addend;
47 u32 divisor;
48 u32 baseclass;
49};
50
51static u32 flow_hashrnd __read_mostly;
52static int flow_hashrnd_initted __read_mostly;
53
54static const struct tcf_ext_map flow_ext_map = {
55 .action = TCA_FLOW_ACT,
56 .police = TCA_FLOW_POLICE,
57};
58
59static inline u32 addr_fold(void *addr)
60{
61 unsigned long a = (unsigned long)addr;
62
63 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
64}
65
66static u32 flow_get_src(const struct sk_buff *skb)
67{
68 switch (skb->protocol) {
69 case __constant_htons(ETH_P_IP):
70 return ntohl(ip_hdr(skb)->saddr);
71 case __constant_htons(ETH_P_IPV6):
72 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
73 default:
74 return addr_fold(skb->sk);
75 }
76}
77
78static u32 flow_get_dst(const struct sk_buff *skb)
79{
80 switch (skb->protocol) {
81 case __constant_htons(ETH_P_IP):
82 return ntohl(ip_hdr(skb)->daddr);
83 case __constant_htons(ETH_P_IPV6):
84 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
85 default:
86 return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
87 }
88}
89
90static u32 flow_get_proto(const struct sk_buff *skb)
91{
92 switch (skb->protocol) {
93 case __constant_htons(ETH_P_IP):
94 return ip_hdr(skb)->protocol;
95 case __constant_htons(ETH_P_IPV6):
96 return ipv6_hdr(skb)->nexthdr;
97 default:
98 return 0;
99 }
100}
101
102static int has_ports(u8 protocol)
103{
104 switch (protocol) {
105 case IPPROTO_TCP:
106 case IPPROTO_UDP:
107 case IPPROTO_UDPLITE:
108 case IPPROTO_SCTP:
109 case IPPROTO_DCCP:
110 case IPPROTO_ESP:
111 return 1;
112 default:
113 return 0;
114 }
115}
116
117static u32 flow_get_proto_src(const struct sk_buff *skb)
118{
119 u32 res = 0;
120
121 switch (skb->protocol) {
122 case __constant_htons(ETH_P_IP): {
123 struct iphdr *iph = ip_hdr(skb);
124
125 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
126 has_ports(iph->protocol))
127 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
128 break;
129 }
130 case __constant_htons(ETH_P_IPV6): {
131 struct ipv6hdr *iph = ipv6_hdr(skb);
132
133 if (has_ports(iph->nexthdr))
134 res = ntohs(*(__be16 *)&iph[1]);
135 break;
136 }
137 default:
138 res = addr_fold(skb->sk);
139 }
140
141 return res;
142}
143
144static u32 flow_get_proto_dst(const struct sk_buff *skb)
145{
146 u32 res = 0;
147
148 switch (skb->protocol) {
149 case __constant_htons(ETH_P_IP): {
150 struct iphdr *iph = ip_hdr(skb);
151
152 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
153 has_ports(iph->protocol))
154 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
155 break;
156 }
157 case __constant_htons(ETH_P_IPV6): {
158 struct ipv6hdr *iph = ipv6_hdr(skb);
159
160 if (has_ports(iph->nexthdr))
161 res = ntohs(*(__be16 *)((void *)&iph[1] + 2));
162 break;
163 }
164 default:
165 res = addr_fold(skb->dst) ^ (__force u16)skb->protocol;
166 }
167
168 return res;
169}
170
171static u32 flow_get_iif(const struct sk_buff *skb)
172{
173 return skb->iif;
174}
175
176static u32 flow_get_priority(const struct sk_buff *skb)
177{
178 return skb->priority;
179}
180
181static u32 flow_get_mark(const struct sk_buff *skb)
182{
183 return skb->mark;
184}
185
186static u32 flow_get_nfct(const struct sk_buff *skb)
187{
188#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
189 return addr_fold(skb->nfct);
190#else
191 return 0;
192#endif
193}
194
195#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
196#define CTTUPLE(skb, member) \
197({ \
198 enum ip_conntrack_info ctinfo; \
199 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \
200 if (ct == NULL) \
201 goto fallback; \
202 ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \
203})
204#else
205#define CTTUPLE(skb, member) \
206({ \
207 goto fallback; \
208 0; \
209})
210#endif
211
212static u32 flow_get_nfct_src(const struct sk_buff *skb)
213{
214 switch (skb->protocol) {
215 case __constant_htons(ETH_P_IP):
216 return ntohl(CTTUPLE(skb, src.u3.ip));
217 case __constant_htons(ETH_P_IPV6):
218 return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
219 }
220fallback:
221 return flow_get_src(skb);
222}
223
224static u32 flow_get_nfct_dst(const struct sk_buff *skb)
225{
226 switch (skb->protocol) {
227 case __constant_htons(ETH_P_IP):
228 return ntohl(CTTUPLE(skb, dst.u3.ip));
229 case __constant_htons(ETH_P_IPV6):
230 return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
231 }
232fallback:
233 return flow_get_dst(skb);
234}
235
236static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
237{
238 return ntohs(CTTUPLE(skb, src.u.all));
239fallback:
240 return flow_get_proto_src(skb);
241}
242
243static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
244{
245 return ntohs(CTTUPLE(skb, dst.u.all));
246fallback:
247 return flow_get_proto_dst(skb);
248}
249
250static u32 flow_get_rtclassid(const struct sk_buff *skb)
251{
252#ifdef CONFIG_NET_CLS_ROUTE
253 if (skb->dst)
254 return skb->dst->tclassid;
255#endif
256 return 0;
257}
258
259static u32 flow_get_skuid(const struct sk_buff *skb)
260{
261 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
262 return skb->sk->sk_socket->file->f_uid;
263 return 0;
264}
265
266static u32 flow_get_skgid(const struct sk_buff *skb)
267{
268 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
269 return skb->sk->sk_socket->file->f_gid;
270 return 0;
271}
272
273static u32 flow_key_get(const struct sk_buff *skb, int key)
274{
275 switch (key) {
276 case FLOW_KEY_SRC:
277 return flow_get_src(skb);
278 case FLOW_KEY_DST:
279 return flow_get_dst(skb);
280 case FLOW_KEY_PROTO:
281 return flow_get_proto(skb);
282 case FLOW_KEY_PROTO_SRC:
283 return flow_get_proto_src(skb);
284 case FLOW_KEY_PROTO_DST:
285 return flow_get_proto_dst(skb);
286 case FLOW_KEY_IIF:
287 return flow_get_iif(skb);
288 case FLOW_KEY_PRIORITY:
289 return flow_get_priority(skb);
290 case FLOW_KEY_MARK:
291 return flow_get_mark(skb);
292 case FLOW_KEY_NFCT:
293 return flow_get_nfct(skb);
294 case FLOW_KEY_NFCT_SRC:
295 return flow_get_nfct_src(skb);
296 case FLOW_KEY_NFCT_DST:
297 return flow_get_nfct_dst(skb);
298 case FLOW_KEY_NFCT_PROTO_SRC:
299 return flow_get_nfct_proto_src(skb);
300 case FLOW_KEY_NFCT_PROTO_DST:
301 return flow_get_nfct_proto_dst(skb);
302 case FLOW_KEY_RTCLASSID:
303 return flow_get_rtclassid(skb);
304 case FLOW_KEY_SKUID:
305 return flow_get_skuid(skb);
306 case FLOW_KEY_SKGID:
307 return flow_get_skgid(skb);
308 default:
309 WARN_ON(1);
310 return 0;
311 }
312}
313
314static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
315 struct tcf_result *res)
316{
317 struct flow_head *head = tp->root;
318 struct flow_filter *f;
319 u32 keymask;
320 u32 classid;
321 unsigned int n, key;
322 int r;
323
324 list_for_each_entry(f, &head->filters, list) {
325 u32 keys[f->nkeys];
326
327 if (!tcf_em_tree_match(skb, &f->ematches, NULL))
328 continue;
329
330 keymask = f->keymask;
331
332 for (n = 0; n < f->nkeys; n++) {
333 key = ffs(keymask) - 1;
334 keymask &= ~(1 << key);
335 keys[n] = flow_key_get(skb, key);
336 }
337
338 if (f->mode == FLOW_MODE_HASH)
339 classid = jhash2(keys, f->nkeys, flow_hashrnd);
340 else {
341 classid = keys[0];
342 classid = (classid & f->mask) ^ f->xor;
343 classid = (classid >> f->rshift) + f->addend;
344 }
345
346 if (f->divisor)
347 classid %= f->divisor;
348
349 res->class = 0;
350 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
351
352 r = tcf_exts_exec(skb, &f->exts, res);
353 if (r < 0)
354 continue;
355 return r;
356 }
357 return -1;
358}
359
360static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
361 [TCA_FLOW_KEYS] = { .type = NLA_U32 },
362 [TCA_FLOW_MODE] = { .type = NLA_U32 },
363 [TCA_FLOW_BASECLASS] = { .type = NLA_U32 },
364 [TCA_FLOW_RSHIFT] = { .type = NLA_U32 },
365 [TCA_FLOW_ADDEND] = { .type = NLA_U32 },
366 [TCA_FLOW_MASK] = { .type = NLA_U32 },
367 [TCA_FLOW_XOR] = { .type = NLA_U32 },
368 [TCA_FLOW_DIVISOR] = { .type = NLA_U32 },
369 [TCA_FLOW_ACT] = { .type = NLA_NESTED },
370 [TCA_FLOW_POLICE] = { .type = NLA_NESTED },
371 [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED },
372};
373
374static int flow_change(struct tcf_proto *tp, unsigned long base,
375 u32 handle, struct nlattr **tca,
376 unsigned long *arg)
377{
378 struct flow_head *head = tp->root;
379 struct flow_filter *f;
380 struct nlattr *opt = tca[TCA_OPTIONS];
381 struct nlattr *tb[TCA_FLOW_MAX + 1];
382 struct tcf_exts e;
383 struct tcf_ematch_tree t;
384 unsigned int nkeys = 0;
385 u32 baseclass = 0;
386 u32 keymask = 0;
387 u32 mode;
388 int err;
389
390 if (opt == NULL)
391 return -EINVAL;
392
393 err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
394 if (err < 0)
395 return err;
396
397 if (tb[TCA_FLOW_BASECLASS]) {
398 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
399 if (TC_H_MIN(baseclass) == 0)
400 return -EINVAL;
401 }
402
403 if (tb[TCA_FLOW_KEYS]) {
404 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
405 if (fls(keymask) - 1 > FLOW_KEY_MAX)
406 return -EOPNOTSUPP;
407
408 nkeys = hweight32(keymask);
409 if (nkeys == 0)
410 return -EINVAL;
411 }
412
413 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
414 if (err < 0)
415 return err;
416
417 err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
418 if (err < 0)
419 goto err1;
420
421 f = (struct flow_filter *)*arg;
422 if (f != NULL) {
423 err = -EINVAL;
424 if (f->handle != handle && handle)
425 goto err2;
426
427 mode = f->mode;
428 if (tb[TCA_FLOW_MODE])
429 mode = nla_get_u32(tb[TCA_FLOW_MODE]);
430 if (mode != FLOW_MODE_HASH && nkeys > 1)
431 goto err2;
432 } else {
433 err = -EINVAL;
434 if (!handle)
435 goto err2;
436 if (!tb[TCA_FLOW_KEYS])
437 goto err2;
438
439 mode = FLOW_MODE_MAP;
440 if (tb[TCA_FLOW_MODE])
441 mode = nla_get_u32(tb[TCA_FLOW_MODE]);
442 if (mode != FLOW_MODE_HASH && nkeys > 1)
443 goto err2;
444
445 if (TC_H_MAJ(baseclass) == 0)
446 baseclass = TC_H_MAKE(tp->q->handle, baseclass);
447 if (TC_H_MIN(baseclass) == 0)
448 baseclass = TC_H_MAKE(baseclass, 1);
449
450 err = -ENOBUFS;
451 f = kzalloc(sizeof(*f), GFP_KERNEL);
452 if (f == NULL)
453 goto err2;
454
455 f->handle = handle;
456 f->mask = ~0U;
457 }
458
459 tcf_exts_change(tp, &f->exts, &e);
460 tcf_em_tree_change(tp, &f->ematches, &t);
461
462 tcf_tree_lock(tp);
463
464 if (tb[TCA_FLOW_KEYS]) {
465 f->keymask = keymask;
466 f->nkeys = nkeys;
467 }
468
469 f->mode = mode;
470
471 if (tb[TCA_FLOW_MASK])
472 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
473 if (tb[TCA_FLOW_XOR])
474 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
475 if (tb[TCA_FLOW_RSHIFT])
476 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
477 if (tb[TCA_FLOW_ADDEND])
478 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
479
480 if (tb[TCA_FLOW_DIVISOR])
481 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
482 if (baseclass)
483 f->baseclass = baseclass;
484
485 if (*arg == 0)
486 list_add_tail(&f->list, &head->filters);
487
488 tcf_tree_unlock(tp);
489
490 *arg = (unsigned long)f;
491 return 0;
492
493err2:
494 tcf_em_tree_destroy(tp, &t);
495err1:
496 tcf_exts_destroy(tp, &e);
497 return err;
498}
499
500static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
501{
502 tcf_exts_destroy(tp, &f->exts);
503 tcf_em_tree_destroy(tp, &f->ematches);
504 kfree(f);
505}
506
507static int flow_delete(struct tcf_proto *tp, unsigned long arg)
508{
509 struct flow_filter *f = (struct flow_filter *)arg;
510
511 tcf_tree_lock(tp);
512 list_del(&f->list);
513 tcf_tree_unlock(tp);
514 flow_destroy_filter(tp, f);
515 return 0;
516}
517
518static int flow_init(struct tcf_proto *tp)
519{
520 struct flow_head *head;
521
522 if (!flow_hashrnd_initted) {
523 get_random_bytes(&flow_hashrnd, 4);
524 flow_hashrnd_initted = 1;
525 }
526
527 head = kzalloc(sizeof(*head), GFP_KERNEL);
528 if (head == NULL)
529 return -ENOBUFS;
530 INIT_LIST_HEAD(&head->filters);
531 tp->root = head;
532 return 0;
533}
534
535static void flow_destroy(struct tcf_proto *tp)
536{
537 struct flow_head *head = tp->root;
538 struct flow_filter *f, *next;
539
540 list_for_each_entry_safe(f, next, &head->filters, list) {
541 list_del(&f->list);
542 flow_destroy_filter(tp, f);
543 }
544 kfree(head);
545}
546
547static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
548{
549 struct flow_head *head = tp->root;
550 struct flow_filter *f;
551
552 list_for_each_entry(f, &head->filters, list)
553 if (f->handle == handle)
554 return (unsigned long)f;
555 return 0;
556}
557
558static void flow_put(struct tcf_proto *tp, unsigned long f)
559{
560 return;
561}
562
563static int flow_dump(struct tcf_proto *tp, unsigned long fh,
564 struct sk_buff *skb, struct tcmsg *t)
565{
566 struct flow_filter *f = (struct flow_filter *)fh;
567 struct nlattr *nest;
568
569 if (f == NULL)
570 return skb->len;
571
572 t->tcm_handle = f->handle;
573
574 nest = nla_nest_start(skb, TCA_OPTIONS);
575 if (nest == NULL)
576 goto nla_put_failure;
577
578 NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask);
579 NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode);
580
581 if (f->mask != ~0 || f->xor != 0) {
582 NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask);
583 NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor);
584 }
585 if (f->rshift)
586 NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift);
587 if (f->addend)
588 NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend);
589
590 if (f->divisor)
591 NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor);
592 if (f->baseclass)
593 NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
594
595 if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
596 goto nla_put_failure;
597
598 if (f->ematches.hdr.nmatches &&
599 tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
600 goto nla_put_failure;
601
602 nla_nest_end(skb, nest);
603
604 if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
605 goto nla_put_failure;
606
607 return skb->len;
608
609nla_put_failure:
610 nlmsg_trim(skb, nest);
611 return -1;
612}
613
614static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
615{
616 struct flow_head *head = tp->root;
617 struct flow_filter *f;
618
619 list_for_each_entry(f, &head->filters, list) {
620 if (arg->count < arg->skip)
621 goto skip;
622 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
623 arg->stop = 1;
624 break;
625 }
626skip:
627 arg->count++;
628 }
629}
630
631static struct tcf_proto_ops cls_flow_ops __read_mostly = {
632 .kind = "flow",
633 .classify = flow_classify,
634 .init = flow_init,
635 .destroy = flow_destroy,
636 .change = flow_change,
637 .delete = flow_delete,
638 .get = flow_get,
639 .put = flow_put,
640 .dump = flow_dump,
641 .walk = flow_walk,
642 .owner = THIS_MODULE,
643};
644
645static int __init cls_flow_init(void)
646{
647 return register_tcf_proto_ops(&cls_flow_ops);
648}
649
650static void __exit cls_flow_exit(void)
651{
652 unregister_tcf_proto_ops(&cls_flow_ops);
653}
654
655module_init(cls_flow_init);
656module_exit(cls_flow_exit);
657
658MODULE_LICENSE("GPL");
659MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
660MODULE_DESCRIPTION("TC flow classifier");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 8adbd6a37d14..b0f90e593af0 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -47,7 +47,7 @@ struct fw_filter
47 struct tcf_exts exts; 47 struct tcf_exts exts;
48}; 48};
49 49
50static struct tcf_ext_map fw_ext_map = { 50static const struct tcf_ext_map fw_ext_map = {
51 .action = TCA_FW_ACT, 51 .action = TCA_FW_ACT,
52 .police = TCA_FW_POLICE 52 .police = TCA_FW_POLICE
53}; 53};
@@ -186,39 +186,41 @@ out:
186 return -EINVAL; 186 return -EINVAL;
187} 187}
188 188
189static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
190 [TCA_FW_CLASSID] = { .type = NLA_U32 },
191 [TCA_FW_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
192 [TCA_FW_MASK] = { .type = NLA_U32 },
193};
194
189static int 195static int
190fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, 196fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
191 struct rtattr **tb, struct rtattr **tca, unsigned long base) 197 struct nlattr **tb, struct nlattr **tca, unsigned long base)
192{ 198{
193 struct fw_head *head = (struct fw_head *)tp->root; 199 struct fw_head *head = (struct fw_head *)tp->root;
194 struct tcf_exts e; 200 struct tcf_exts e;
195 u32 mask; 201 u32 mask;
196 int err; 202 int err;
197 203
198 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); 204 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
199 if (err < 0) 205 if (err < 0)
200 return err; 206 return err;
201 207
202 err = -EINVAL; 208 err = -EINVAL;
203 if (tb[TCA_FW_CLASSID-1]) { 209 if (tb[TCA_FW_CLASSID]) {
204 if (RTA_PAYLOAD(tb[TCA_FW_CLASSID-1]) != sizeof(u32)) 210 f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
205 goto errout;
206 f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]);
207 tcf_bind_filter(tp, &f->res, base); 211 tcf_bind_filter(tp, &f->res, base);
208 } 212 }
209 213
210#ifdef CONFIG_NET_CLS_IND 214#ifdef CONFIG_NET_CLS_IND
211 if (tb[TCA_FW_INDEV-1]) { 215 if (tb[TCA_FW_INDEV]) {
212 err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV-1]); 216 err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV]);
213 if (err < 0) 217 if (err < 0)
214 goto errout; 218 goto errout;
215 } 219 }
216#endif /* CONFIG_NET_CLS_IND */ 220#endif /* CONFIG_NET_CLS_IND */
217 221
218 if (tb[TCA_FW_MASK-1]) { 222 if (tb[TCA_FW_MASK]) {
219 if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) 223 mask = nla_get_u32(tb[TCA_FW_MASK]);
220 goto errout;
221 mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
222 if (mask != head->mask) 224 if (mask != head->mask)
223 goto errout; 225 goto errout;
224 } else if (head->mask != 0xFFFFFFFF) 226 } else if (head->mask != 0xFFFFFFFF)
@@ -234,20 +236,21 @@ errout:
234 236
235static int fw_change(struct tcf_proto *tp, unsigned long base, 237static int fw_change(struct tcf_proto *tp, unsigned long base,
236 u32 handle, 238 u32 handle,
237 struct rtattr **tca, 239 struct nlattr **tca,
238 unsigned long *arg) 240 unsigned long *arg)
239{ 241{
240 struct fw_head *head = (struct fw_head*)tp->root; 242 struct fw_head *head = (struct fw_head*)tp->root;
241 struct fw_filter *f = (struct fw_filter *) *arg; 243 struct fw_filter *f = (struct fw_filter *) *arg;
242 struct rtattr *opt = tca[TCA_OPTIONS-1]; 244 struct nlattr *opt = tca[TCA_OPTIONS];
243 struct rtattr *tb[TCA_FW_MAX]; 245 struct nlattr *tb[TCA_FW_MAX + 1];
244 int err; 246 int err;
245 247
246 if (!opt) 248 if (!opt)
247 return handle ? -EINVAL : 0; 249 return handle ? -EINVAL : 0;
248 250
249 if (rtattr_parse_nested(tb, TCA_FW_MAX, opt) < 0) 251 err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
250 return -EINVAL; 252 if (err < 0)
253 return err;
251 254
252 if (f != NULL) { 255 if (f != NULL) {
253 if (f->id != handle && handle) 256 if (f->id != handle && handle)
@@ -260,11 +263,8 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
260 263
261 if (head == NULL) { 264 if (head == NULL) {
262 u32 mask = 0xFFFFFFFF; 265 u32 mask = 0xFFFFFFFF;
263 if (tb[TCA_FW_MASK-1]) { 266 if (tb[TCA_FW_MASK])
264 if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) 267 mask = nla_get_u32(tb[TCA_FW_MASK]);
265 return -EINVAL;
266 mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
267 }
268 268
269 head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); 269 head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
270 if (head == NULL) 270 if (head == NULL)
@@ -333,7 +333,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
333 struct fw_head *head = (struct fw_head *)tp->root; 333 struct fw_head *head = (struct fw_head *)tp->root;
334 struct fw_filter *f = (struct fw_filter*)fh; 334 struct fw_filter *f = (struct fw_filter*)fh;
335 unsigned char *b = skb_tail_pointer(skb); 335 unsigned char *b = skb_tail_pointer(skb);
336 struct rtattr *rta; 336 struct nlattr *nest;
337 337
338 if (f == NULL) 338 if (f == NULL)
339 return skb->len; 339 return skb->len;
@@ -343,35 +343,35 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
343 if (!f->res.classid && !tcf_exts_is_available(&f->exts)) 343 if (!f->res.classid && !tcf_exts_is_available(&f->exts))
344 return skb->len; 344 return skb->len;
345 345
346 rta = (struct rtattr*)b; 346 nest = nla_nest_start(skb, TCA_OPTIONS);
347 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 347 if (nest == NULL)
348 goto nla_put_failure;
348 349
349 if (f->res.classid) 350 if (f->res.classid)
350 RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid); 351 NLA_PUT_U32(skb, TCA_FW_CLASSID, f->res.classid);
351#ifdef CONFIG_NET_CLS_IND 352#ifdef CONFIG_NET_CLS_IND
352 if (strlen(f->indev)) 353 if (strlen(f->indev))
353 RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); 354 NLA_PUT_STRING(skb, TCA_FW_INDEV, f->indev);
354#endif /* CONFIG_NET_CLS_IND */ 355#endif /* CONFIG_NET_CLS_IND */
355 if (head->mask != 0xFFFFFFFF) 356 if (head->mask != 0xFFFFFFFF)
356 RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask); 357 NLA_PUT_U32(skb, TCA_FW_MASK, head->mask);
357 358
358 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) 359 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
359 goto rtattr_failure; 360 goto nla_put_failure;
360 361
361 rta->rta_len = skb_tail_pointer(skb) - b; 362 nla_nest_end(skb, nest);
362 363
363 if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0) 364 if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
364 goto rtattr_failure; 365 goto nla_put_failure;
365 366
366 return skb->len; 367 return skb->len;
367 368
368rtattr_failure: 369nla_put_failure:
369 nlmsg_trim(skb, b); 370 nlmsg_trim(skb, b);
370 return -1; 371 return -1;
371} 372}
372 373
373static struct tcf_proto_ops cls_fw_ops = { 374static struct tcf_proto_ops cls_fw_ops __read_mostly = {
374 .next = NULL,
375 .kind = "fw", 375 .kind = "fw",
376 .classify = fw_classify, 376 .classify = fw_classify,
377 .init = fw_init, 377 .init = fw_init,
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0a8409c1d28a..784dcb870b98 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -62,7 +62,7 @@ struct route4_filter
62 62
63#define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) 63#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
64 64
65static struct tcf_ext_map route_ext_map = { 65static const struct tcf_ext_map route_ext_map = {
66 .police = TCA_ROUTE4_POLICE, 66 .police = TCA_ROUTE4_POLICE,
67 .action = TCA_ROUTE4_ACT 67 .action = TCA_ROUTE4_ACT
68}; 68};
@@ -323,9 +323,16 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
323 return 0; 323 return 0;
324} 324}
325 325
326static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
327 [TCA_ROUTE4_CLASSID] = { .type = NLA_U32 },
328 [TCA_ROUTE4_TO] = { .type = NLA_U32 },
329 [TCA_ROUTE4_FROM] = { .type = NLA_U32 },
330 [TCA_ROUTE4_IIF] = { .type = NLA_U32 },
331};
332
326static int route4_set_parms(struct tcf_proto *tp, unsigned long base, 333static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
327 struct route4_filter *f, u32 handle, struct route4_head *head, 334 struct route4_filter *f, u32 handle, struct route4_head *head,
328 struct rtattr **tb, struct rtattr *est, int new) 335 struct nlattr **tb, struct nlattr *est, int new)
329{ 336{
330 int err; 337 int err;
331 u32 id = 0, to = 0, nhandle = 0x8000; 338 u32 id = 0, to = 0, nhandle = 0x8000;
@@ -339,34 +346,24 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
339 return err; 346 return err;
340 347
341 err = -EINVAL; 348 err = -EINVAL;
342 if (tb[TCA_ROUTE4_CLASSID-1]) 349 if (tb[TCA_ROUTE4_TO]) {
343 if (RTA_PAYLOAD(tb[TCA_ROUTE4_CLASSID-1]) < sizeof(u32))
344 goto errout;
345
346 if (tb[TCA_ROUTE4_TO-1]) {
347 if (new && handle & 0x8000) 350 if (new && handle & 0x8000)
348 goto errout; 351 goto errout;
349 if (RTA_PAYLOAD(tb[TCA_ROUTE4_TO-1]) < sizeof(u32)) 352 to = nla_get_u32(tb[TCA_ROUTE4_TO]);
350 goto errout;
351 to = *(u32*)RTA_DATA(tb[TCA_ROUTE4_TO-1]);
352 if (to > 0xFF) 353 if (to > 0xFF)
353 goto errout; 354 goto errout;
354 nhandle = to; 355 nhandle = to;
355 } 356 }
356 357
357 if (tb[TCA_ROUTE4_FROM-1]) { 358 if (tb[TCA_ROUTE4_FROM]) {
358 if (tb[TCA_ROUTE4_IIF-1]) 359 if (tb[TCA_ROUTE4_IIF])
359 goto errout; 360 goto errout;
360 if (RTA_PAYLOAD(tb[TCA_ROUTE4_FROM-1]) < sizeof(u32)) 361 id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
361 goto errout;
362 id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_FROM-1]);
363 if (id > 0xFF) 362 if (id > 0xFF)
364 goto errout; 363 goto errout;
365 nhandle |= id << 16; 364 nhandle |= id << 16;
366 } else if (tb[TCA_ROUTE4_IIF-1]) { 365 } else if (tb[TCA_ROUTE4_IIF]) {
367 if (RTA_PAYLOAD(tb[TCA_ROUTE4_IIF-1]) < sizeof(u32)) 366 id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
368 goto errout;
369 id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]);
370 if (id > 0x7FFF) 367 if (id > 0x7FFF)
371 goto errout; 368 goto errout;
372 nhandle |= (id | 0x8000) << 16; 369 nhandle |= (id | 0x8000) << 16;
@@ -398,20 +395,20 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
398 } 395 }
399 396
400 tcf_tree_lock(tp); 397 tcf_tree_lock(tp);
401 if (tb[TCA_ROUTE4_TO-1]) 398 if (tb[TCA_ROUTE4_TO])
402 f->id = to; 399 f->id = to;
403 400
404 if (tb[TCA_ROUTE4_FROM-1]) 401 if (tb[TCA_ROUTE4_FROM])
405 f->id = to | id<<16; 402 f->id = to | id<<16;
406 else if (tb[TCA_ROUTE4_IIF-1]) 403 else if (tb[TCA_ROUTE4_IIF])
407 f->iif = id; 404 f->iif = id;
408 405
409 f->handle = nhandle; 406 f->handle = nhandle;
410 f->bkt = b; 407 f->bkt = b;
411 tcf_tree_unlock(tp); 408 tcf_tree_unlock(tp);
412 409
413 if (tb[TCA_ROUTE4_CLASSID-1]) { 410 if (tb[TCA_ROUTE4_CLASSID]) {
414 f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]); 411 f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
415 tcf_bind_filter(tp, &f->res, base); 412 tcf_bind_filter(tp, &f->res, base);
416 } 413 }
417 414
@@ -425,14 +422,14 @@ errout:
425 422
426static int route4_change(struct tcf_proto *tp, unsigned long base, 423static int route4_change(struct tcf_proto *tp, unsigned long base,
427 u32 handle, 424 u32 handle,
428 struct rtattr **tca, 425 struct nlattr **tca,
429 unsigned long *arg) 426 unsigned long *arg)
430{ 427{
431 struct route4_head *head = tp->root; 428 struct route4_head *head = tp->root;
432 struct route4_filter *f, *f1, **fp; 429 struct route4_filter *f, *f1, **fp;
433 struct route4_bucket *b; 430 struct route4_bucket *b;
434 struct rtattr *opt = tca[TCA_OPTIONS-1]; 431 struct nlattr *opt = tca[TCA_OPTIONS];
435 struct rtattr *tb[TCA_ROUTE4_MAX]; 432 struct nlattr *tb[TCA_ROUTE4_MAX + 1];
436 unsigned int h, th; 433 unsigned int h, th;
437 u32 old_handle = 0; 434 u32 old_handle = 0;
438 int err; 435 int err;
@@ -440,8 +437,9 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
440 if (opt == NULL) 437 if (opt == NULL)
441 return handle ? -EINVAL : 0; 438 return handle ? -EINVAL : 0;
442 439
443 if (rtattr_parse_nested(tb, TCA_ROUTE4_MAX, opt) < 0) 440 err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy);
444 return -EINVAL; 441 if (err < 0)
442 return err;
445 443
446 if ((f = (struct route4_filter*)*arg) != NULL) { 444 if ((f = (struct route4_filter*)*arg) != NULL) {
447 if (f->handle != handle && handle) 445 if (f->handle != handle && handle)
@@ -451,7 +449,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
451 old_handle = f->handle; 449 old_handle = f->handle;
452 450
453 err = route4_set_parms(tp, base, f, handle, head, tb, 451 err = route4_set_parms(tp, base, f, handle, head, tb,
454 tca[TCA_RATE-1], 0); 452 tca[TCA_RATE], 0);
455 if (err < 0) 453 if (err < 0)
456 return err; 454 return err;
457 455
@@ -474,7 +472,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
474 goto errout; 472 goto errout;
475 473
476 err = route4_set_parms(tp, base, f, handle, head, tb, 474 err = route4_set_parms(tp, base, f, handle, head, tb,
477 tca[TCA_RATE-1], 1); 475 tca[TCA_RATE], 1);
478 if (err < 0) 476 if (err < 0)
479 goto errout; 477 goto errout;
480 478
@@ -550,7 +548,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
550{ 548{
551 struct route4_filter *f = (struct route4_filter*)fh; 549 struct route4_filter *f = (struct route4_filter*)fh;
552 unsigned char *b = skb_tail_pointer(skb); 550 unsigned char *b = skb_tail_pointer(skb);
553 struct rtattr *rta; 551 struct nlattr *nest;
554 u32 id; 552 u32 id;
555 553
556 if (f == NULL) 554 if (f == NULL)
@@ -558,40 +556,40 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
558 556
559 t->tcm_handle = f->handle; 557 t->tcm_handle = f->handle;
560 558
561 rta = (struct rtattr*)b; 559 nest = nla_nest_start(skb, TCA_OPTIONS);
562 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 560 if (nest == NULL)
561 goto nla_put_failure;
563 562
564 if (!(f->handle&0x8000)) { 563 if (!(f->handle&0x8000)) {
565 id = f->id&0xFF; 564 id = f->id&0xFF;
566 RTA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id); 565 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
567 } 566 }
568 if (f->handle&0x80000000) { 567 if (f->handle&0x80000000) {
569 if ((f->handle>>16) != 0xFFFF) 568 if ((f->handle>>16) != 0xFFFF)
570 RTA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif); 569 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
571 } else { 570 } else {
572 id = f->id>>16; 571 id = f->id>>16;
573 RTA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id); 572 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
574 } 573 }
575 if (f->res.classid) 574 if (f->res.classid)
576 RTA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid); 575 NLA_PUT_U32(skb, TCA_ROUTE4_CLASSID, f->res.classid);
577 576
578 if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0) 577 if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
579 goto rtattr_failure; 578 goto nla_put_failure;
580 579
581 rta->rta_len = skb_tail_pointer(skb) - b; 580 nla_nest_end(skb, nest);
582 581
583 if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0) 582 if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
584 goto rtattr_failure; 583 goto nla_put_failure;
585 584
586 return skb->len; 585 return skb->len;
587 586
588rtattr_failure: 587nla_put_failure:
589 nlmsg_trim(skb, b); 588 nlmsg_trim(skb, b);
590 return -1; 589 return -1;
591} 590}
592 591
593static struct tcf_proto_ops cls_route4_ops = { 592static struct tcf_proto_ops cls_route4_ops __read_mostly = {
594 .next = NULL,
595 .kind = "route", 593 .kind = "route",
596 .classify = route4_classify, 594 .classify = route4_classify,
597 .init = route4_init, 595 .init = route4_init,
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 22f9ede70e8f..7034ea4530e5 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -397,17 +397,26 @@ static u32 gen_tunnel(struct rsvp_head *data)
397 return 0; 397 return 0;
398} 398}
399 399
400static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
401 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
402 [TCA_RSVP_DST] = { .type = NLA_BINARY,
403 .len = RSVP_DST_LEN * sizeof(u32) },
404 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
405 .len = RSVP_DST_LEN * sizeof(u32) },
406 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
407};
408
400static int rsvp_change(struct tcf_proto *tp, unsigned long base, 409static int rsvp_change(struct tcf_proto *tp, unsigned long base,
401 u32 handle, 410 u32 handle,
402 struct rtattr **tca, 411 struct nlattr **tca,
403 unsigned long *arg) 412 unsigned long *arg)
404{ 413{
405 struct rsvp_head *data = tp->root; 414 struct rsvp_head *data = tp->root;
406 struct rsvp_filter *f, **fp; 415 struct rsvp_filter *f, **fp;
407 struct rsvp_session *s, **sp; 416 struct rsvp_session *s, **sp;
408 struct tc_rsvp_pinfo *pinfo = NULL; 417 struct tc_rsvp_pinfo *pinfo = NULL;
409 struct rtattr *opt = tca[TCA_OPTIONS-1]; 418 struct nlattr *opt = tca[TCA_OPTIONS-1];
410 struct rtattr *tb[TCA_RSVP_MAX]; 419 struct nlattr *tb[TCA_RSVP_MAX + 1];
411 struct tcf_exts e; 420 struct tcf_exts e;
412 unsigned h1, h2; 421 unsigned h1, h2;
413 __be32 *dst; 422 __be32 *dst;
@@ -416,8 +425,9 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
416 if (opt == NULL) 425 if (opt == NULL)
417 return handle ? -EINVAL : 0; 426 return handle ? -EINVAL : 0;
418 427
419 if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0) 428 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
420 return -EINVAL; 429 if (err < 0)
430 return err;
421 431
422 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map); 432 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
423 if (err < 0) 433 if (err < 0)
@@ -429,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
429 if (f->handle != handle && handle) 439 if (f->handle != handle && handle)
430 goto errout2; 440 goto errout2;
431 if (tb[TCA_RSVP_CLASSID-1]) { 441 if (tb[TCA_RSVP_CLASSID-1]) {
432 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]); 442 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
433 tcf_bind_filter(tp, &f->res, base); 443 tcf_bind_filter(tp, &f->res, base);
434 } 444 }
435 445
@@ -451,31 +461,18 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
451 461
452 h2 = 16; 462 h2 = 16;
453 if (tb[TCA_RSVP_SRC-1]) { 463 if (tb[TCA_RSVP_SRC-1]) {
454 err = -EINVAL; 464 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
455 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
456 goto errout;
457 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
458 h2 = hash_src(f->src); 465 h2 = hash_src(f->src);
459 } 466 }
460 if (tb[TCA_RSVP_PINFO-1]) { 467 if (tb[TCA_RSVP_PINFO-1]) {
461 err = -EINVAL; 468 pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
462 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
463 goto errout;
464 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
465 f->spi = pinfo->spi; 469 f->spi = pinfo->spi;
466 f->tunnelhdr = pinfo->tunnelhdr; 470 f->tunnelhdr = pinfo->tunnelhdr;
467 } 471 }
468 if (tb[TCA_RSVP_CLASSID-1]) { 472 if (tb[TCA_RSVP_CLASSID-1])
469 err = -EINVAL; 473 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
470 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
471 goto errout;
472 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
473 }
474 474
475 err = -EINVAL; 475 dst = nla_data(tb[TCA_RSVP_DST-1]);
476 if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
477 goto errout;
478 dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
479 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); 476 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
480 477
481 err = -ENOMEM; 478 err = -ENOMEM;
@@ -594,7 +591,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
594 struct rsvp_filter *f = (struct rsvp_filter*)fh; 591 struct rsvp_filter *f = (struct rsvp_filter*)fh;
595 struct rsvp_session *s; 592 struct rsvp_session *s;
596 unsigned char *b = skb_tail_pointer(skb); 593 unsigned char *b = skb_tail_pointer(skb);
597 struct rtattr *rta; 594 struct nlattr *nest;
598 struct tc_rsvp_pinfo pinfo; 595 struct tc_rsvp_pinfo pinfo;
599 596
600 if (f == NULL) 597 if (f == NULL)
@@ -603,33 +600,33 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
603 600
604 t->tcm_handle = f->handle; 601 t->tcm_handle = f->handle;
605 602
603 nest = nla_nest_start(skb, TCA_OPTIONS);
604 if (nest == NULL)
605 goto nla_put_failure;
606 606
607 rta = (struct rtattr*)b; 607 NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
608 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
609
610 RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
611 pinfo.dpi = s->dpi; 608 pinfo.dpi = s->dpi;
612 pinfo.spi = f->spi; 609 pinfo.spi = f->spi;
613 pinfo.protocol = s->protocol; 610 pinfo.protocol = s->protocol;
614 pinfo.tunnelid = s->tunnelid; 611 pinfo.tunnelid = s->tunnelid;
615 pinfo.tunnelhdr = f->tunnelhdr; 612 pinfo.tunnelhdr = f->tunnelhdr;
616 pinfo.pad = 0; 613 pinfo.pad = 0;
617 RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); 614 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
618 if (f->res.classid) 615 if (f->res.classid)
619 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid); 616 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
620 if (((f->handle>>8)&0xFF) != 16) 617 if (((f->handle>>8)&0xFF) != 16)
621 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); 618 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
622 619
623 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) 620 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
624 goto rtattr_failure; 621 goto nla_put_failure;
625 622
626 rta->rta_len = skb_tail_pointer(skb) - b; 623 nla_nest_end(skb, nest);
627 624
628 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0) 625 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
629 goto rtattr_failure; 626 goto nla_put_failure;
630 return skb->len; 627 return skb->len;
631 628
632rtattr_failure: 629nla_put_failure:
633 nlmsg_trim(skb, b); 630 nlmsg_trim(skb, b);
634 return -1; 631 return -1;
635} 632}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 2314820a080a..7a7bff5ded24 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -29,19 +29,6 @@
29#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ 29#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
30 30
31 31
32#if 1 /* control */
33#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
34#else
35#define DPRINTK(format,args...)
36#endif
37
38#if 0 /* data */
39#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
40#else
41#define D2PRINTK(format,args...)
42#endif
43
44
45#define PRIV(tp) ((struct tcindex_data *) (tp)->root) 32#define PRIV(tp) ((struct tcindex_data *) (tp)->root)
46 33
47 34
@@ -68,7 +55,7 @@ struct tcindex_data {
68 int fall_through; /* 0: only classify if explicit match */ 55 int fall_through; /* 0: only classify if explicit match */
69}; 56};
70 57
71static struct tcf_ext_map tcindex_ext_map = { 58static const struct tcf_ext_map tcindex_ext_map = {
72 .police = TCA_TCINDEX_POLICE, 59 .police = TCA_TCINDEX_POLICE,
73 .action = TCA_TCINDEX_ACT 60 .action = TCA_TCINDEX_ACT
74}; 61};
@@ -104,7 +91,8 @@ static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
104 struct tcindex_filter_result *f; 91 struct tcindex_filter_result *f;
105 int key = (skb->tc_index & p->mask) >> p->shift; 92 int key = (skb->tc_index & p->mask) >> p->shift;
106 93
107 D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p); 94 pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
95 skb, tp, res, p);
108 96
109 f = tcindex_lookup(p, key); 97 f = tcindex_lookup(p, key);
110 if (!f) { 98 if (!f) {
@@ -112,11 +100,11 @@ static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
112 return -1; 100 return -1;
113 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); 101 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
114 res->class = 0; 102 res->class = 0;
115 D2PRINTK("alg 0x%x\n",res->classid); 103 pr_debug("alg 0x%x\n", res->classid);
116 return 0; 104 return 0;
117 } 105 }
118 *res = f->res; 106 *res = f->res;
119 D2PRINTK("map 0x%x\n",res->classid); 107 pr_debug("map 0x%x\n", res->classid);
120 108
121 return tcf_exts_exec(skb, &f->exts, res); 109 return tcf_exts_exec(skb, &f->exts, res);
122} 110}
@@ -127,7 +115,7 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
127 struct tcindex_data *p = PRIV(tp); 115 struct tcindex_data *p = PRIV(tp);
128 struct tcindex_filter_result *r; 116 struct tcindex_filter_result *r;
129 117
130 DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle); 118 pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
131 if (p->perfect && handle >= p->alloc_hash) 119 if (p->perfect && handle >= p->alloc_hash)
132 return 0; 120 return 0;
133 r = tcindex_lookup(p, handle); 121 r = tcindex_lookup(p, handle);
@@ -137,7 +125,7 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
137 125
138static void tcindex_put(struct tcf_proto *tp, unsigned long f) 126static void tcindex_put(struct tcf_proto *tp, unsigned long f)
139{ 127{
140 DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f); 128 pr_debug("tcindex_put(tp %p,f 0x%lx)\n", tp, f);
141} 129}
142 130
143 131
@@ -145,8 +133,8 @@ static int tcindex_init(struct tcf_proto *tp)
145{ 133{
146 struct tcindex_data *p; 134 struct tcindex_data *p;
147 135
148 DPRINTK("tcindex_init(tp %p)\n",tp); 136 pr_debug("tcindex_init(tp %p)\n", tp);
149 p = kzalloc(sizeof(struct tcindex_data),GFP_KERNEL); 137 p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
150 if (!p) 138 if (!p)
151 return -ENOMEM; 139 return -ENOMEM;
152 140
@@ -166,7 +154,7 @@ __tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
166 struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; 154 struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
167 struct tcindex_filter *f = NULL; 155 struct tcindex_filter *f = NULL;
168 156
169 DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f); 157 pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
170 if (p->perfect) { 158 if (p->perfect) {
171 if (!r->res.class) 159 if (!r->res.class)
172 return -ENOENT; 160 return -ENOENT;
@@ -205,10 +193,18 @@ valid_perfect_hash(struct tcindex_data *p)
205 return p->hash > (p->mask >> p->shift); 193 return p->hash > (p->mask >> p->shift);
206} 194}
207 195
196static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
197 [TCA_TCINDEX_HASH] = { .type = NLA_U32 },
198 [TCA_TCINDEX_MASK] = { .type = NLA_U16 },
199 [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
200 [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
201 [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
202};
203
208static int 204static int
209tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, 205tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
210 struct tcindex_data *p, struct tcindex_filter_result *r, 206 struct tcindex_data *p, struct tcindex_filter_result *r,
211 struct rtattr **tb, struct rtattr *est) 207 struct nlattr **tb, struct nlattr *est)
212{ 208{
213 int err, balloc = 0; 209 int err, balloc = 0;
214 struct tcindex_filter_result new_filter_result, *old_r = r; 210 struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -229,24 +225,14 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
229 else 225 else
230 memset(&cr, 0, sizeof(cr)); 226 memset(&cr, 0, sizeof(cr));
231 227
232 err = -EINVAL; 228 if (tb[TCA_TCINDEX_HASH])
233 if (tb[TCA_TCINDEX_HASH-1]) { 229 cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
234 if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(u32))
235 goto errout;
236 cp.hash = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
237 }
238 230
239 if (tb[TCA_TCINDEX_MASK-1]) { 231 if (tb[TCA_TCINDEX_MASK])
240 if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(u16)) 232 cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
241 goto errout;
242 cp.mask = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
243 }
244 233
245 if (tb[TCA_TCINDEX_SHIFT-1]) { 234 if (tb[TCA_TCINDEX_SHIFT])
246 if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(int)) 235 cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
247 goto errout;
248 cp.shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
249 }
250 236
251 err = -EBUSY; 237 err = -EBUSY;
252 /* Hash already allocated, make sure that we still meet the 238 /* Hash already allocated, make sure that we still meet the
@@ -260,12 +246,8 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
260 goto errout; 246 goto errout;
261 247
262 err = -EINVAL; 248 err = -EINVAL;
263 if (tb[TCA_TCINDEX_FALL_THROUGH-1]) { 249 if (tb[TCA_TCINDEX_FALL_THROUGH])
264 if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(u32)) 250 cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
265 goto errout;
266 cp.fall_through =
267 *(u32 *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
268 }
269 251
270 if (!cp.hash) { 252 if (!cp.hash) {
271 /* Hash not specified, use perfect hash if the upper limit 253 /* Hash not specified, use perfect hash if the upper limit
@@ -316,8 +298,8 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
316 goto errout_alloc; 298 goto errout_alloc;
317 } 299 }
318 300
319 if (tb[TCA_TCINDEX_CLASSID-1]) { 301 if (tb[TCA_TCINDEX_CLASSID]) {
320 cr.res.classid = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]); 302 cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
321 tcf_bind_filter(tp, &cr.res, base); 303 tcf_bind_filter(tp, &cr.res, base);
322 } 304 }
323 305
@@ -356,34 +338,36 @@ errout:
356 338
357static int 339static int
358tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle, 340tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
359 struct rtattr **tca, unsigned long *arg) 341 struct nlattr **tca, unsigned long *arg)
360{ 342{
361 struct rtattr *opt = tca[TCA_OPTIONS-1]; 343 struct nlattr *opt = tca[TCA_OPTIONS];
362 struct rtattr *tb[TCA_TCINDEX_MAX]; 344 struct nlattr *tb[TCA_TCINDEX_MAX + 1];
363 struct tcindex_data *p = PRIV(tp); 345 struct tcindex_data *p = PRIV(tp);
364 struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; 346 struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
347 int err;
365 348
366 DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," 349 pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
367 "p %p,r %p,*arg 0x%lx\n", 350 "p %p,r %p,*arg 0x%lx\n",
368 tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L); 351 tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
369 352
370 if (!opt) 353 if (!opt)
371 return 0; 354 return 0;
372 355
373 if (rtattr_parse_nested(tb, TCA_TCINDEX_MAX, opt) < 0) 356 err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
374 return -EINVAL; 357 if (err < 0)
358 return err;
375 359
376 return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE-1]); 360 return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]);
377} 361}
378 362
379 363
380static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) 364static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
381{ 365{
382 struct tcindex_data *p = PRIV(tp); 366 struct tcindex_data *p = PRIV(tp);
383 struct tcindex_filter *f,*next; 367 struct tcindex_filter *f, *next;
384 int i; 368 int i;
385 369
386 DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p); 370 pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
387 if (p->perfect) { 371 if (p->perfect) {
388 for (i = 0; i < p->hash; i++) { 372 for (i = 0; i < p->hash; i++) {
389 if (!p->perfect[i].res.class) 373 if (!p->perfect[i].res.class)
@@ -405,7 +389,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
405 for (f = p->h[i]; f; f = next) { 389 for (f = p->h[i]; f; f = next) {
406 next = f->next; 390 next = f->next;
407 if (walker->count >= walker->skip) { 391 if (walker->count >= walker->skip) {
408 if (walker->fn(tp,(unsigned long) &f->result, 392 if (walker->fn(tp, (unsigned long) &f->result,
409 walker) < 0) { 393 walker) < 0) {
410 walker->stop = 1; 394 walker->stop = 1;
411 return; 395 return;
@@ -429,11 +413,11 @@ static void tcindex_destroy(struct tcf_proto *tp)
429 struct tcindex_data *p = PRIV(tp); 413 struct tcindex_data *p = PRIV(tp);
430 struct tcf_walker walker; 414 struct tcf_walker walker;
431 415
432 DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p); 416 pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
433 walker.count = 0; 417 walker.count = 0;
434 walker.skip = 0; 418 walker.skip = 0;
435 walker.fn = &tcindex_destroy_element; 419 walker.fn = &tcindex_destroy_element;
436 tcindex_walk(tp,&walker); 420 tcindex_walk(tp, &walker);
437 kfree(p->perfect); 421 kfree(p->perfect);
438 kfree(p->h); 422 kfree(p->h);
439 kfree(p); 423 kfree(p);
@@ -447,21 +431,23 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
447 struct tcindex_data *p = PRIV(tp); 431 struct tcindex_data *p = PRIV(tp);
448 struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; 432 struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
449 unsigned char *b = skb_tail_pointer(skb); 433 unsigned char *b = skb_tail_pointer(skb);
450 struct rtattr *rta; 434 struct nlattr *nest;
435
436 pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
437 tp, fh, skb, t, p, r, b);
438 pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
439
440 nest = nla_nest_start(skb, TCA_OPTIONS);
441 if (nest == NULL)
442 goto nla_put_failure;
451 443
452 DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
453 tp,fh,skb,t,p,r,b);
454 DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
455 rta = (struct rtattr *) b;
456 RTA_PUT(skb,TCA_OPTIONS,0,NULL);
457 if (!fh) { 444 if (!fh) {
458 t->tcm_handle = ~0; /* whatever ... */ 445 t->tcm_handle = ~0; /* whatever ... */
459 RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash); 446 NLA_PUT_U32(skb, TCA_TCINDEX_HASH, p->hash);
460 RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask); 447 NLA_PUT_U16(skb, TCA_TCINDEX_MASK, p->mask);
461 RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift); 448 NLA_PUT_U32(skb, TCA_TCINDEX_SHIFT, p->shift);
462 RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through), 449 NLA_PUT_U32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through);
463 &p->fall_through); 450 nla_nest_end(skb, nest);
464 rta->rta_len = skb_tail_pointer(skb) - b;
465 } else { 451 } else {
466 if (p->perfect) { 452 if (p->perfect) {
467 t->tcm_handle = r-p->perfect; 453 t->tcm_handle = r-p->perfect;
@@ -478,27 +464,26 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
478 } 464 }
479 } 465 }
480 } 466 }
481 DPRINTK("handle = %d\n",t->tcm_handle); 467 pr_debug("handle = %d\n", t->tcm_handle);
482 if (r->res.class) 468 if (r->res.class)
483 RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid); 469 NLA_PUT_U32(skb, TCA_TCINDEX_CLASSID, r->res.classid);
484 470
485 if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) 471 if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
486 goto rtattr_failure; 472 goto nla_put_failure;
487 rta->rta_len = skb_tail_pointer(skb) - b; 473 nla_nest_end(skb, nest);
488 474
489 if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) 475 if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
490 goto rtattr_failure; 476 goto nla_put_failure;
491 } 477 }
492 478
493 return skb->len; 479 return skb->len;
494 480
495rtattr_failure: 481nla_put_failure:
496 nlmsg_trim(skb, b); 482 nlmsg_trim(skb, b);
497 return -1; 483 return -1;
498} 484}
499 485
500static struct tcf_proto_ops cls_tcindex_ops = { 486static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
501 .next = NULL,
502 .kind = "tcindex", 487 .kind = "tcindex",
503 .classify = tcindex_classify, 488 .classify = tcindex_classify,
504 .init = tcindex_init, 489 .init = tcindex_init,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 9e98c6e567dd..b18fa95ef248 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -82,7 +82,7 @@ struct tc_u_common
82 u32 hgenerator; 82 u32 hgenerator;
83}; 83};
84 84
85static struct tcf_ext_map u32_ext_map = { 85static const struct tcf_ext_map u32_ext_map = {
86 .action = TCA_U32_ACT, 86 .action = TCA_U32_ACT,
87 .police = TCA_U32_POLICE 87 .police = TCA_U32_POLICE
88}; 88};
@@ -91,7 +91,7 @@ static struct tc_u_common *u32_list;
91 91
92static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift) 92static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift)
93{ 93{
94 unsigned h = (key & sel->hmask)>>fshift; 94 unsigned h = ntohl(key & sel->hmask)>>fshift;
95 95
96 return h; 96 return h;
97} 97}
@@ -460,10 +460,20 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
460 return handle|(i>0xFFF ? 0xFFF : i); 460 return handle|(i>0xFFF ? 0xFFF : i);
461} 461}
462 462
463static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
464 [TCA_U32_CLASSID] = { .type = NLA_U32 },
465 [TCA_U32_HASH] = { .type = NLA_U32 },
466 [TCA_U32_LINK] = { .type = NLA_U32 },
467 [TCA_U32_DIVISOR] = { .type = NLA_U32 },
468 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) },
469 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
470 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) },
471};
472
463static int u32_set_parms(struct tcf_proto *tp, unsigned long base, 473static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
464 struct tc_u_hnode *ht, 474 struct tc_u_hnode *ht,
465 struct tc_u_knode *n, struct rtattr **tb, 475 struct tc_u_knode *n, struct nlattr **tb,
466 struct rtattr *est) 476 struct nlattr *est)
467{ 477{
468 int err; 478 int err;
469 struct tcf_exts e; 479 struct tcf_exts e;
@@ -473,8 +483,8 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
473 return err; 483 return err;
474 484
475 err = -EINVAL; 485 err = -EINVAL;
476 if (tb[TCA_U32_LINK-1]) { 486 if (tb[TCA_U32_LINK]) {
477 u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]); 487 u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
478 struct tc_u_hnode *ht_down = NULL; 488 struct tc_u_hnode *ht_down = NULL;
479 489
480 if (TC_U32_KEY(handle)) 490 if (TC_U32_KEY(handle))
@@ -495,14 +505,14 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
495 if (ht_down) 505 if (ht_down)
496 ht_down->refcnt--; 506 ht_down->refcnt--;
497 } 507 }
498 if (tb[TCA_U32_CLASSID-1]) { 508 if (tb[TCA_U32_CLASSID]) {
499 n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]); 509 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
500 tcf_bind_filter(tp, &n->res, base); 510 tcf_bind_filter(tp, &n->res, base);
501 } 511 }
502 512
503#ifdef CONFIG_NET_CLS_IND 513#ifdef CONFIG_NET_CLS_IND
504 if (tb[TCA_U32_INDEV-1]) { 514 if (tb[TCA_U32_INDEV]) {
505 err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]); 515 err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV]);
506 if (err < 0) 516 if (err < 0)
507 goto errout; 517 goto errout;
508 } 518 }
@@ -516,33 +526,34 @@ errout:
516} 526}
517 527
518static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, 528static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
519 struct rtattr **tca, 529 struct nlattr **tca,
520 unsigned long *arg) 530 unsigned long *arg)
521{ 531{
522 struct tc_u_common *tp_c = tp->data; 532 struct tc_u_common *tp_c = tp->data;
523 struct tc_u_hnode *ht; 533 struct tc_u_hnode *ht;
524 struct tc_u_knode *n; 534 struct tc_u_knode *n;
525 struct tc_u32_sel *s; 535 struct tc_u32_sel *s;
526 struct rtattr *opt = tca[TCA_OPTIONS-1]; 536 struct nlattr *opt = tca[TCA_OPTIONS];
527 struct rtattr *tb[TCA_U32_MAX]; 537 struct nlattr *tb[TCA_U32_MAX + 1];
528 u32 htid; 538 u32 htid;
529 int err; 539 int err;
530 540
531 if (opt == NULL) 541 if (opt == NULL)
532 return handle ? -EINVAL : 0; 542 return handle ? -EINVAL : 0;
533 543
534 if (rtattr_parse_nested(tb, TCA_U32_MAX, opt) < 0) 544 err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
535 return -EINVAL; 545 if (err < 0)
546 return err;
536 547
537 if ((n = (struct tc_u_knode*)*arg) != NULL) { 548 if ((n = (struct tc_u_knode*)*arg) != NULL) {
538 if (TC_U32_KEY(n->handle) == 0) 549 if (TC_U32_KEY(n->handle) == 0)
539 return -EINVAL; 550 return -EINVAL;
540 551
541 return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE-1]); 552 return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]);
542 } 553 }
543 554
544 if (tb[TCA_U32_DIVISOR-1]) { 555 if (tb[TCA_U32_DIVISOR]) {
545 unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]); 556 unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
546 557
547 if (--divisor > 0x100) 558 if (--divisor > 0x100)
548 return -EINVAL; 559 return -EINVAL;
@@ -567,8 +578,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
567 return 0; 578 return 0;
568 } 579 }
569 580
570 if (tb[TCA_U32_HASH-1]) { 581 if (tb[TCA_U32_HASH]) {
571 htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]); 582 htid = nla_get_u32(tb[TCA_U32_HASH]);
572 if (TC_U32_HTID(htid) == TC_U32_ROOT) { 583 if (TC_U32_HTID(htid) == TC_U32_ROOT) {
573 ht = tp->root; 584 ht = tp->root;
574 htid = ht->handle; 585 htid = ht->handle;
@@ -592,11 +603,10 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
592 } else 603 } else
593 handle = gen_new_kid(ht, htid); 604 handle = gen_new_kid(ht, htid);
594 605
595 if (tb[TCA_U32_SEL-1] == NULL || 606 if (tb[TCA_U32_SEL] == NULL)
596 RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
597 return -EINVAL; 607 return -EINVAL;
598 608
599 s = RTA_DATA(tb[TCA_U32_SEL-1]); 609 s = nla_data(tb[TCA_U32_SEL]);
600 610
601 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); 611 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
602 if (n == NULL) 612 if (n == NULL)
@@ -613,36 +623,19 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
613 memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); 623 memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
614 n->ht_up = ht; 624 n->ht_up = ht;
615 n->handle = handle; 625 n->handle = handle;
616{ 626 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
617 u8 i = 0;
618 u32 mask = s->hmask;
619 if (mask) {
620 while (!(mask & 1)) {
621 i++;
622 mask>>=1;
623 }
624 }
625 n->fshift = i;
626}
627 627
628#ifdef CONFIG_CLS_U32_MARK 628#ifdef CONFIG_CLS_U32_MARK
629 if (tb[TCA_U32_MARK-1]) { 629 if (tb[TCA_U32_MARK]) {
630 struct tc_u32_mark *mark; 630 struct tc_u32_mark *mark;
631 631
632 if (RTA_PAYLOAD(tb[TCA_U32_MARK-1]) < sizeof(struct tc_u32_mark)) { 632 mark = nla_data(tb[TCA_U32_MARK]);
633#ifdef CONFIG_CLS_U32_PERF
634 kfree(n->pf);
635#endif
636 kfree(n);
637 return -EINVAL;
638 }
639 mark = RTA_DATA(tb[TCA_U32_MARK-1]);
640 memcpy(&n->mark, mark, sizeof(struct tc_u32_mark)); 633 memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
641 n->mark.success = 0; 634 n->mark.success = 0;
642 } 635 }
643#endif 636#endif
644 637
645 err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE-1]); 638 err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]);
646 if (err == 0) { 639 if (err == 0) {
647 struct tc_u_knode **ins; 640 struct tc_u_knode **ins;
648 for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) 641 for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
@@ -703,66 +696,66 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
703 struct sk_buff *skb, struct tcmsg *t) 696 struct sk_buff *skb, struct tcmsg *t)
704{ 697{
705 struct tc_u_knode *n = (struct tc_u_knode*)fh; 698 struct tc_u_knode *n = (struct tc_u_knode*)fh;
706 unsigned char *b = skb_tail_pointer(skb); 699 struct nlattr *nest;
707 struct rtattr *rta;
708 700
709 if (n == NULL) 701 if (n == NULL)
710 return skb->len; 702 return skb->len;
711 703
712 t->tcm_handle = n->handle; 704 t->tcm_handle = n->handle;
713 705
714 rta = (struct rtattr*)b; 706 nest = nla_nest_start(skb, TCA_OPTIONS);
715 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 707 if (nest == NULL)
708 goto nla_put_failure;
716 709
717 if (TC_U32_KEY(n->handle) == 0) { 710 if (TC_U32_KEY(n->handle) == 0) {
718 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; 711 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
719 u32 divisor = ht->divisor+1; 712 u32 divisor = ht->divisor+1;
720 RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor); 713 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
721 } else { 714 } else {
722 RTA_PUT(skb, TCA_U32_SEL, 715 NLA_PUT(skb, TCA_U32_SEL,
723 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), 716 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
724 &n->sel); 717 &n->sel);
725 if (n->ht_up) { 718 if (n->ht_up) {
726 u32 htid = n->handle & 0xFFFFF000; 719 u32 htid = n->handle & 0xFFFFF000;
727 RTA_PUT(skb, TCA_U32_HASH, 4, &htid); 720 NLA_PUT_U32(skb, TCA_U32_HASH, htid);
728 } 721 }
729 if (n->res.classid) 722 if (n->res.classid)
730 RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid); 723 NLA_PUT_U32(skb, TCA_U32_CLASSID, n->res.classid);
731 if (n->ht_down) 724 if (n->ht_down)
732 RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle); 725 NLA_PUT_U32(skb, TCA_U32_LINK, n->ht_down->handle);
733 726
734#ifdef CONFIG_CLS_U32_MARK 727#ifdef CONFIG_CLS_U32_MARK
735 if (n->mark.val || n->mark.mask) 728 if (n->mark.val || n->mark.mask)
736 RTA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark); 729 NLA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
737#endif 730#endif
738 731
739 if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0) 732 if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
740 goto rtattr_failure; 733 goto nla_put_failure;
741 734
742#ifdef CONFIG_NET_CLS_IND 735#ifdef CONFIG_NET_CLS_IND
743 if(strlen(n->indev)) 736 if(strlen(n->indev))
744 RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev); 737 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
745#endif 738#endif
746#ifdef CONFIG_CLS_U32_PERF 739#ifdef CONFIG_CLS_U32_PERF
747 RTA_PUT(skb, TCA_U32_PCNT, 740 NLA_PUT(skb, TCA_U32_PCNT,
748 sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64), 741 sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
749 n->pf); 742 n->pf);
750#endif 743#endif
751 } 744 }
752 745
753 rta->rta_len = skb_tail_pointer(skb) - b; 746 nla_nest_end(skb, nest);
747
754 if (TC_U32_KEY(n->handle)) 748 if (TC_U32_KEY(n->handle))
755 if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0) 749 if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
756 goto rtattr_failure; 750 goto nla_put_failure;
757 return skb->len; 751 return skb->len;
758 752
759rtattr_failure: 753nla_put_failure:
760 nlmsg_trim(skb, b); 754 nla_nest_cancel(skb, nest);
761 return -1; 755 return -1;
762} 756}
763 757
764static struct tcf_proto_ops cls_u32_ops = { 758static struct tcf_proto_ops cls_u32_ops __read_mostly = {
765 .next = NULL,
766 .kind = "u32", 759 .kind = "u32",
767 .classify = u32_classify, 760 .classify = u32_classify,
768 .init = u32_init, 761 .init = u32_init,
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index ceda8890ab0e..a1e5619b1876 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -542,11 +542,11 @@ static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
542 return r; 542 return r;
543} 543}
544 544
545static int meta_var_change(struct meta_value *dst, struct rtattr *rta) 545static int meta_var_change(struct meta_value *dst, struct nlattr *nla)
546{ 546{
547 int len = RTA_PAYLOAD(rta); 547 int len = nla_len(nla);
548 548
549 dst->val = (unsigned long)kmemdup(RTA_DATA(rta), len, GFP_KERNEL); 549 dst->val = (unsigned long)kmemdup(nla_data(nla), len, GFP_KERNEL);
550 if (dst->val == 0UL) 550 if (dst->val == 0UL)
551 return -ENOMEM; 551 return -ENOMEM;
552 dst->len = len; 552 dst->len = len;
@@ -570,10 +570,10 @@ static void meta_var_apply_extras(struct meta_value *v,
570static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv) 570static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
571{ 571{
572 if (v->val && v->len) 572 if (v->val && v->len)
573 RTA_PUT(skb, tlv, v->len, (void *) v->val); 573 NLA_PUT(skb, tlv, v->len, (void *) v->val);
574 return 0; 574 return 0;
575 575
576rtattr_failure: 576nla_put_failure:
577 return -1; 577 return -1;
578} 578}
579 579
@@ -594,13 +594,13 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
594 return 1; 594 return 1;
595} 595}
596 596
597static int meta_int_change(struct meta_value *dst, struct rtattr *rta) 597static int meta_int_change(struct meta_value *dst, struct nlattr *nla)
598{ 598{
599 if (RTA_PAYLOAD(rta) >= sizeof(unsigned long)) { 599 if (nla_len(nla) >= sizeof(unsigned long)) {
600 dst->val = *(unsigned long *) RTA_DATA(rta); 600 dst->val = *(unsigned long *) nla_data(nla);
601 dst->len = sizeof(unsigned long); 601 dst->len = sizeof(unsigned long);
602 } else if (RTA_PAYLOAD(rta) == sizeof(u32)) { 602 } else if (nla_len(nla) == sizeof(u32)) {
603 dst->val = *(u32 *) RTA_DATA(rta); 603 dst->val = nla_get_u32(nla);
604 dst->len = sizeof(u32); 604 dst->len = sizeof(u32);
605 } else 605 } else
606 return -EINVAL; 606 return -EINVAL;
@@ -621,15 +621,14 @@ static void meta_int_apply_extras(struct meta_value *v,
621static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv) 621static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
622{ 622{
623 if (v->len == sizeof(unsigned long)) 623 if (v->len == sizeof(unsigned long))
624 RTA_PUT(skb, tlv, sizeof(unsigned long), &v->val); 624 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
625 else if (v->len == sizeof(u32)) { 625 else if (v->len == sizeof(u32)) {
626 u32 d = v->val; 626 NLA_PUT_U32(skb, tlv, v->val);
627 RTA_PUT(skb, tlv, sizeof(d), &d);
628 } 627 }
629 628
630 return 0; 629 return 0;
631 630
632rtattr_failure: 631nla_put_failure:
633 return -1; 632 return -1;
634} 633}
635 634
@@ -641,7 +640,7 @@ struct meta_type_ops
641{ 640{
642 void (*destroy)(struct meta_value *); 641 void (*destroy)(struct meta_value *);
643 int (*compare)(struct meta_obj *, struct meta_obj *); 642 int (*compare)(struct meta_obj *, struct meta_obj *);
644 int (*change)(struct meta_value *, struct rtattr *); 643 int (*change)(struct meta_value *, struct nlattr *);
645 void (*apply_extras)(struct meta_value *, struct meta_obj *); 644 void (*apply_extras)(struct meta_value *, struct meta_obj *);
646 int (*dump)(struct sk_buff *, struct meta_value *, int); 645 int (*dump)(struct sk_buff *, struct meta_value *, int);
647}; 646};
@@ -729,13 +728,13 @@ static inline void meta_delete(struct meta_match *meta)
729 kfree(meta); 728 kfree(meta);
730} 729}
731 730
732static inline int meta_change_data(struct meta_value *dst, struct rtattr *rta) 731static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
733{ 732{
734 if (rta) { 733 if (nla) {
735 if (RTA_PAYLOAD(rta) == 0) 734 if (nla_len(nla) == 0)
736 return -EINVAL; 735 return -EINVAL;
737 736
738 return meta_type_ops(dst)->change(dst, rta); 737 return meta_type_ops(dst)->change(dst, nla);
739 } 738 }
740 739
741 return 0; 740 return 0;
@@ -746,21 +745,26 @@ static inline int meta_is_supported(struct meta_value *val)
746 return (!meta_id(val) || meta_ops(val)->get); 745 return (!meta_id(val) || meta_ops(val)->get);
747} 746}
748 747
748static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
749 [TCA_EM_META_HDR] = { .len = sizeof(struct tcf_meta_hdr) },
750};
751
749static int em_meta_change(struct tcf_proto *tp, void *data, int len, 752static int em_meta_change(struct tcf_proto *tp, void *data, int len,
750 struct tcf_ematch *m) 753 struct tcf_ematch *m)
751{ 754{
752 int err = -EINVAL; 755 int err;
753 struct rtattr *tb[TCA_EM_META_MAX]; 756 struct nlattr *tb[TCA_EM_META_MAX + 1];
754 struct tcf_meta_hdr *hdr; 757 struct tcf_meta_hdr *hdr;
755 struct meta_match *meta = NULL; 758 struct meta_match *meta = NULL;
756 759
757 if (rtattr_parse(tb, TCA_EM_META_MAX, data, len) < 0) 760 err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy);
761 if (err < 0)
758 goto errout; 762 goto errout;
759 763
760 if (tb[TCA_EM_META_HDR-1] == NULL || 764 err = -EINVAL;
761 RTA_PAYLOAD(tb[TCA_EM_META_HDR-1]) < sizeof(*hdr)) 765 if (tb[TCA_EM_META_HDR] == NULL)
762 goto errout; 766 goto errout;
763 hdr = RTA_DATA(tb[TCA_EM_META_HDR-1]); 767 hdr = nla_data(tb[TCA_EM_META_HDR]);
764 768
765 if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) || 769 if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
766 TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX || 770 TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
@@ -781,8 +785,8 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
781 goto errout; 785 goto errout;
782 } 786 }
783 787
784 if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE-1]) < 0 || 788 if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE]) < 0 ||
785 meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE-1]) < 0) 789 meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE]) < 0)
786 goto errout; 790 goto errout;
787 791
788 m->datalen = sizeof(*meta); 792 m->datalen = sizeof(*meta);
@@ -811,16 +815,16 @@ static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
811 memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left)); 815 memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
812 memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right)); 816 memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
813 817
814 RTA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr); 818 NLA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
815 819
816 ops = meta_type_ops(&meta->lvalue); 820 ops = meta_type_ops(&meta->lvalue);
817 if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 || 821 if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
818 ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0) 822 ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0)
819 goto rtattr_failure; 823 goto nla_put_failure;
820 824
821 return 0; 825 return 0;
822 826
823rtattr_failure: 827nla_put_failure:
824 return -1; 828 return -1;
825} 829}
826 830
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index d5cd86efb7d0..853c5ead87fd 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -118,11 +118,14 @@ static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
118 conf.pattern_len = textsearch_get_pattern_len(tm->config); 118 conf.pattern_len = textsearch_get_pattern_len(tm->config);
119 conf.pad = 0; 119 conf.pad = 0;
120 120
121 RTA_PUT_NOHDR(skb, sizeof(conf), &conf); 121 if (nla_put_nohdr(skb, sizeof(conf), &conf) < 0)
122 RTA_APPEND(skb, conf.pattern_len, textsearch_get_pattern(tm->config)); 122 goto nla_put_failure;
123 if (nla_append(skb, conf.pattern_len,
124 textsearch_get_pattern(tm->config)) < 0)
125 goto nla_put_failure;
123 return 0; 126 return 0;
124 127
125rtattr_failure: 128nla_put_failure:
126 return -1; 129 return -1;
127} 130}
128 131
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index f3a104e323bd..74ff918455a2 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -141,6 +141,7 @@ errout:
141 write_unlock(&ematch_mod_lock); 141 write_unlock(&ematch_mod_lock);
142 return err; 142 return err;
143} 143}
144EXPORT_SYMBOL(tcf_em_register);
144 145
145/** 146/**
146 * tcf_em_unregister - unregster and extended match 147 * tcf_em_unregister - unregster and extended match
@@ -171,6 +172,7 @@ out:
171 write_unlock(&ematch_mod_lock); 172 write_unlock(&ematch_mod_lock);
172 return err; 173 return err;
173} 174}
175EXPORT_SYMBOL(tcf_em_unregister);
174 176
175static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, 177static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
176 int index) 178 int index)
@@ -181,11 +183,11 @@ static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
181 183
182static int tcf_em_validate(struct tcf_proto *tp, 184static int tcf_em_validate(struct tcf_proto *tp,
183 struct tcf_ematch_tree_hdr *tree_hdr, 185 struct tcf_ematch_tree_hdr *tree_hdr,
184 struct tcf_ematch *em, struct rtattr *rta, int idx) 186 struct tcf_ematch *em, struct nlattr *nla, int idx)
185{ 187{
186 int err = -EINVAL; 188 int err = -EINVAL;
187 struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta); 189 struct tcf_ematch_hdr *em_hdr = nla_data(nla);
188 int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr); 190 int data_len = nla_len(nla) - sizeof(*em_hdr);
189 void *data = (void *) em_hdr + sizeof(*em_hdr); 191 void *data = (void *) em_hdr + sizeof(*em_hdr);
190 192
191 if (!TCF_EM_REL_VALID(em_hdr->flags)) 193 if (!TCF_EM_REL_VALID(em_hdr->flags))
@@ -280,15 +282,20 @@ errout:
280 return err; 282 return err;
281} 283}
282 284
285static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = {
286 [TCA_EMATCH_TREE_HDR] = { .len = sizeof(struct tcf_ematch_tree_hdr) },
287 [TCA_EMATCH_TREE_LIST] = { .type = NLA_NESTED },
288};
289
283/** 290/**
284 * tcf_em_tree_validate - validate ematch config TLV and build ematch tree 291 * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
285 * 292 *
286 * @tp: classifier kind handle 293 * @tp: classifier kind handle
287 * @rta: ematch tree configuration TLV 294 * @nla: ematch tree configuration TLV
288 * @tree: destination ematch tree variable to store the resulting 295 * @tree: destination ematch tree variable to store the resulting
289 * ematch tree. 296 * ematch tree.
290 * 297 *
291 * This function validates the given configuration TLV @rta and builds an 298 * This function validates the given configuration TLV @nla and builds an
292 * ematch tree in @tree. The resulting tree must later be copied into 299 * ematch tree in @tree. The resulting tree must later be copied into
293 * the private classifier data using tcf_em_tree_change(). You MUST NOT 300 * the private classifier data using tcf_em_tree_change(). You MUST NOT
294 * provide the ematch tree variable of the private classifier data directly, 301 * provide the ematch tree variable of the private classifier data directly,
@@ -296,45 +303,43 @@ errout:
296 * 303 *
297 * Returns a negative error code if the configuration TLV contains errors. 304 * Returns a negative error code if the configuration TLV contains errors.
298 */ 305 */
299int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta, 306int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
300 struct tcf_ematch_tree *tree) 307 struct tcf_ematch_tree *tree)
301{ 308{
302 int idx, list_len, matches_len, err = -EINVAL; 309 int idx, list_len, matches_len, err;
303 struct rtattr *tb[TCA_EMATCH_TREE_MAX]; 310 struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];
304 struct rtattr *rt_match, *rt_hdr, *rt_list; 311 struct nlattr *rt_match, *rt_hdr, *rt_list;
305 struct tcf_ematch_tree_hdr *tree_hdr; 312 struct tcf_ematch_tree_hdr *tree_hdr;
306 struct tcf_ematch *em; 313 struct tcf_ematch *em;
307 314
308 if (!rta) { 315 if (!nla) {
309 memset(tree, 0, sizeof(*tree)); 316 memset(tree, 0, sizeof(*tree));
310 return 0; 317 return 0;
311 } 318 }
312 319
313 if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0) 320 err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);
321 if (err < 0)
314 goto errout; 322 goto errout;
315 323
316 rt_hdr = tb[TCA_EMATCH_TREE_HDR-1]; 324 err = -EINVAL;
317 rt_list = tb[TCA_EMATCH_TREE_LIST-1]; 325 rt_hdr = tb[TCA_EMATCH_TREE_HDR];
326 rt_list = tb[TCA_EMATCH_TREE_LIST];
318 327
319 if (rt_hdr == NULL || rt_list == NULL) 328 if (rt_hdr == NULL || rt_list == NULL)
320 goto errout; 329 goto errout;
321 330
322 if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) || 331 tree_hdr = nla_data(rt_hdr);
323 RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
324 goto errout;
325
326 tree_hdr = RTA_DATA(rt_hdr);
327 memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr)); 332 memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
328 333
329 rt_match = RTA_DATA(rt_list); 334 rt_match = nla_data(rt_list);
330 list_len = RTA_PAYLOAD(rt_list); 335 list_len = nla_len(rt_list);
331 matches_len = tree_hdr->nmatches * sizeof(*em); 336 matches_len = tree_hdr->nmatches * sizeof(*em);
332 337
333 tree->matches = kzalloc(matches_len, GFP_KERNEL); 338 tree->matches = kzalloc(matches_len, GFP_KERNEL);
334 if (tree->matches == NULL) 339 if (tree->matches == NULL)
335 goto errout; 340 goto errout;
336 341
337 /* We do not use rtattr_parse_nested here because the maximum 342 /* We do not use nla_parse_nested here because the maximum
338 * number of attributes is unknown. This saves us the allocation 343 * number of attributes is unknown. This saves us the allocation
339 * for a tb buffer which would serve no purpose at all. 344 * for a tb buffer which would serve no purpose at all.
340 * 345 *
@@ -342,16 +347,16 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
342 * provided, their type must be incremental from 1 to n. Even 347 * provided, their type must be incremental from 1 to n. Even
343 * if it does not serve any real purpose, a failure of sticking 348 * if it does not serve any real purpose, a failure of sticking
344 * to this policy will result in parsing failure. */ 349 * to this policy will result in parsing failure. */
345 for (idx = 0; RTA_OK(rt_match, list_len); idx++) { 350 for (idx = 0; nla_ok(rt_match, list_len); idx++) {
346 err = -EINVAL; 351 err = -EINVAL;
347 352
348 if (rt_match->rta_type != (idx + 1)) 353 if (rt_match->nla_type != (idx + 1))
349 goto errout_abort; 354 goto errout_abort;
350 355
351 if (idx >= tree_hdr->nmatches) 356 if (idx >= tree_hdr->nmatches)
352 goto errout_abort; 357 goto errout_abort;
353 358
354 if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr)) 359 if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr))
355 goto errout_abort; 360 goto errout_abort;
356 361
357 em = tcf_em_get_match(tree, idx); 362 em = tcf_em_get_match(tree, idx);
@@ -360,7 +365,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
360 if (err < 0) 365 if (err < 0)
361 goto errout_abort; 366 goto errout_abort;
362 367
363 rt_match = RTA_NEXT(rt_match, list_len); 368 rt_match = nla_next(rt_match, &list_len);
364 } 369 }
365 370
366 /* Check if the number of matches provided by userspace actually 371 /* Check if the number of matches provided by userspace actually
@@ -380,6 +385,7 @@ errout_abort:
380 tcf_em_tree_destroy(tp, tree); 385 tcf_em_tree_destroy(tp, tree);
381 return err; 386 return err;
382} 387}
388EXPORT_SYMBOL(tcf_em_tree_validate);
383 389
384/** 390/**
385 * tcf_em_tree_destroy - destroy an ematch tree 391 * tcf_em_tree_destroy - destroy an ematch tree
@@ -413,6 +419,7 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
413 tree->hdr.nmatches = 0; 419 tree->hdr.nmatches = 0;
414 kfree(tree->matches); 420 kfree(tree->matches);
415} 421}
422EXPORT_SYMBOL(tcf_em_tree_destroy);
416 423
417/** 424/**
418 * tcf_em_tree_dump - dump ematch tree into a rtnl message 425 * tcf_em_tree_dump - dump ematch tree into a rtnl message
@@ -430,18 +437,22 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
430{ 437{
431 int i; 438 int i;
432 u8 *tail; 439 u8 *tail;
433 struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb); 440 struct nlattr *top_start;
434 struct rtattr *list_start; 441 struct nlattr *list_start;
442
443 top_start = nla_nest_start(skb, tlv);
444 if (top_start == NULL)
445 goto nla_put_failure;
435 446
436 RTA_PUT(skb, tlv, 0, NULL); 447 NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
437 RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
438 448
439 list_start = (struct rtattr *)skb_tail_pointer(skb); 449 list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);
440 RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL); 450 if (list_start == NULL)
451 goto nla_put_failure;
441 452
442 tail = skb_tail_pointer(skb); 453 tail = skb_tail_pointer(skb);
443 for (i = 0; i < tree->hdr.nmatches; i++) { 454 for (i = 0; i < tree->hdr.nmatches; i++) {
444 struct rtattr *match_start = (struct rtattr *)tail; 455 struct nlattr *match_start = (struct nlattr *)tail;
445 struct tcf_ematch *em = tcf_em_get_match(tree, i); 456 struct tcf_ematch *em = tcf_em_get_match(tree, i);
446 struct tcf_ematch_hdr em_hdr = { 457 struct tcf_ematch_hdr em_hdr = {
447 .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER, 458 .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -449,29 +460,30 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
449 .flags = em->flags 460 .flags = em->flags
450 }; 461 };
451 462
452 RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); 463 NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
453 464
454 if (em->ops && em->ops->dump) { 465 if (em->ops && em->ops->dump) {
455 if (em->ops->dump(skb, em) < 0) 466 if (em->ops->dump(skb, em) < 0)
456 goto rtattr_failure; 467 goto nla_put_failure;
457 } else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) { 468 } else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
458 u32 u = em->data; 469 u32 u = em->data;
459 RTA_PUT_NOHDR(skb, sizeof(u), &u); 470 nla_put_nohdr(skb, sizeof(u), &u);
460 } else if (em->datalen > 0) 471 } else if (em->datalen > 0)
461 RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data); 472 nla_put_nohdr(skb, em->datalen, (void *) em->data);
462 473
463 tail = skb_tail_pointer(skb); 474 tail = skb_tail_pointer(skb);
464 match_start->rta_len = tail - (u8 *)match_start; 475 match_start->nla_len = tail - (u8 *)match_start;
465 } 476 }
466 477
467 list_start->rta_len = tail - (u8 *)list_start; 478 nla_nest_end(skb, list_start);
468 top_start->rta_len = tail - (u8 *)top_start; 479 nla_nest_end(skb, top_start);
469 480
470 return 0; 481 return 0;
471 482
472rtattr_failure: 483nla_put_failure:
473 return -1; 484 return -1;
474} 485}
486EXPORT_SYMBOL(tcf_em_tree_dump);
475 487
476static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em, 488static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
477 struct tcf_pkt_info *info) 489 struct tcf_pkt_info *info)
@@ -529,10 +541,4 @@ stack_overflow:
529 printk("Local stack overflow, increase NET_EMATCH_STACK\n"); 541 printk("Local stack overflow, increase NET_EMATCH_STACK\n");
530 return -1; 542 return -1;
531} 543}
532
533EXPORT_SYMBOL(tcf_em_register);
534EXPORT_SYMBOL(tcf_em_unregister);
535EXPORT_SYMBOL(tcf_em_tree_validate);
536EXPORT_SYMBOL(tcf_em_tree_destroy);
537EXPORT_SYMBOL(tcf_em_tree_dump);
538EXPORT_SYMBOL(__tcf_em_tree_match); 544EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8ae137e3522b..7e3c048ba9b1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -29,6 +29,7 @@
29#include <linux/hrtimer.h> 29#include <linux/hrtimer.h>
30 30
31#include <net/net_namespace.h> 31#include <net/net_namespace.h>
32#include <net/sock.h>
32#include <net/netlink.h> 33#include <net/netlink.h>
33#include <net/pkt_sched.h> 34#include <net/pkt_sched.h>
34 35
@@ -157,6 +158,7 @@ out:
157 write_unlock(&qdisc_mod_lock); 158 write_unlock(&qdisc_mod_lock);
158 return rc; 159 return rc;
159} 160}
161EXPORT_SYMBOL(register_qdisc);
160 162
161int unregister_qdisc(struct Qdisc_ops *qops) 163int unregister_qdisc(struct Qdisc_ops *qops)
162{ 164{
@@ -175,6 +177,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
175 write_unlock(&qdisc_mod_lock); 177 write_unlock(&qdisc_mod_lock);
176 return err; 178 return err;
177} 179}
180EXPORT_SYMBOL(unregister_qdisc);
178 181
179/* We know handle. Find qdisc among all qdisc's attached to device 182/* We know handle. Find qdisc among all qdisc's attached to device
180 (root qdisc, all its children, children of children etc.) 183 (root qdisc, all its children, children of children etc.)
@@ -195,7 +198,7 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
195{ 198{
196 unsigned long cl; 199 unsigned long cl;
197 struct Qdisc *leaf; 200 struct Qdisc *leaf;
198 struct Qdisc_class_ops *cops = p->ops->cl_ops; 201 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
199 202
200 if (cops == NULL) 203 if (cops == NULL)
201 return NULL; 204 return NULL;
@@ -210,14 +213,14 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
210 213
211/* Find queueing discipline by name */ 214/* Find queueing discipline by name */
212 215
213static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) 216static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
214{ 217{
215 struct Qdisc_ops *q = NULL; 218 struct Qdisc_ops *q = NULL;
216 219
217 if (kind) { 220 if (kind) {
218 read_lock(&qdisc_mod_lock); 221 read_lock(&qdisc_mod_lock);
219 for (q = qdisc_base; q; q = q->next) { 222 for (q = qdisc_base; q; q = q->next) {
220 if (rtattr_strcmp(kind, q->id) == 0) { 223 if (nla_strcmp(kind, q->id) == 0) {
221 if (!try_module_get(q->owner)) 224 if (!try_module_get(q->owner))
222 q = NULL; 225 q = NULL;
223 break; 226 break;
@@ -230,7 +233,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
230 233
231static struct qdisc_rate_table *qdisc_rtab_list; 234static struct qdisc_rate_table *qdisc_rtab_list;
232 235
233struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab) 236struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
234{ 237{
235 struct qdisc_rate_table *rtab; 238 struct qdisc_rate_table *rtab;
236 239
@@ -241,19 +244,21 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *ta
241 } 244 }
242 } 245 }
243 246
244 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024) 247 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
248 nla_len(tab) != TC_RTAB_SIZE)
245 return NULL; 249 return NULL;
246 250
247 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 251 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
248 if (rtab) { 252 if (rtab) {
249 rtab->rate = *r; 253 rtab->rate = *r;
250 rtab->refcnt = 1; 254 rtab->refcnt = 1;
251 memcpy(rtab->data, RTA_DATA(tab), 1024); 255 memcpy(rtab->data, nla_data(tab), 1024);
252 rtab->next = qdisc_rtab_list; 256 rtab->next = qdisc_rtab_list;
253 qdisc_rtab_list = rtab; 257 qdisc_rtab_list = rtab;
254 } 258 }
255 return rtab; 259 return rtab;
256} 260}
261EXPORT_SYMBOL(qdisc_get_rtab);
257 262
258void qdisc_put_rtab(struct qdisc_rate_table *tab) 263void qdisc_put_rtab(struct qdisc_rate_table *tab)
259{ 264{
@@ -270,6 +275,7 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
270 } 275 }
271 } 276 }
272} 277}
278EXPORT_SYMBOL(qdisc_put_rtab);
273 279
274static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 280static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
275{ 281{
@@ -373,7 +379,7 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
373 379
374void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) 380void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
375{ 381{
376 struct Qdisc_class_ops *cops; 382 const struct Qdisc_class_ops *cops;
377 unsigned long cl; 383 unsigned long cl;
378 u32 parentid; 384 u32 parentid;
379 385
@@ -417,7 +423,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
417 *old = dev_graft_qdisc(dev, new); 423 *old = dev_graft_qdisc(dev, new);
418 } 424 }
419 } else { 425 } else {
420 struct Qdisc_class_ops *cops = parent->ops->cl_ops; 426 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
421 427
422 err = -EINVAL; 428 err = -EINVAL;
423 429
@@ -440,10 +446,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
440 446
441static struct Qdisc * 447static struct Qdisc *
442qdisc_create(struct net_device *dev, u32 parent, u32 handle, 448qdisc_create(struct net_device *dev, u32 parent, u32 handle,
443 struct rtattr **tca, int *errp) 449 struct nlattr **tca, int *errp)
444{ 450{
445 int err; 451 int err;
446 struct rtattr *kind = tca[TCA_KIND-1]; 452 struct nlattr *kind = tca[TCA_KIND];
447 struct Qdisc *sch; 453 struct Qdisc *sch;
448 struct Qdisc_ops *ops; 454 struct Qdisc_ops *ops;
449 455
@@ -451,7 +457,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
451#ifdef CONFIG_KMOD 457#ifdef CONFIG_KMOD
452 if (ops == NULL && kind != NULL) { 458 if (ops == NULL && kind != NULL) {
453 char name[IFNAMSIZ]; 459 char name[IFNAMSIZ];
454 if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 460 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
455 /* We dropped the RTNL semaphore in order to 461 /* We dropped the RTNL semaphore in order to
456 * perform the module load. So, even if we 462 * perform the module load. So, even if we
457 * succeeded in loading the module we have to 463 * succeeded in loading the module we have to
@@ -504,11 +510,11 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
504 510
505 sch->handle = handle; 511 sch->handle = handle;
506 512
507 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { 513 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
508 if (tca[TCA_RATE-1]) { 514 if (tca[TCA_RATE]) {
509 err = gen_new_estimator(&sch->bstats, &sch->rate_est, 515 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
510 sch->stats_lock, 516 sch->stats_lock,
511 tca[TCA_RATE-1]); 517 tca[TCA_RATE]);
512 if (err) { 518 if (err) {
513 /* 519 /*
514 * Any broken qdiscs that would require 520 * Any broken qdiscs that would require
@@ -536,20 +542,20 @@ err_out:
536 return NULL; 542 return NULL;
537} 543}
538 544
539static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) 545static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
540{ 546{
541 if (tca[TCA_OPTIONS-1]) { 547 if (tca[TCA_OPTIONS]) {
542 int err; 548 int err;
543 549
544 if (sch->ops->change == NULL) 550 if (sch->ops->change == NULL)
545 return -EINVAL; 551 return -EINVAL;
546 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]); 552 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
547 if (err) 553 if (err)
548 return err; 554 return err;
549 } 555 }
550 if (tca[TCA_RATE-1]) 556 if (tca[TCA_RATE])
551 gen_replace_estimator(&sch->bstats, &sch->rate_est, 557 gen_replace_estimator(&sch->bstats, &sch->rate_est,
552 sch->stats_lock, tca[TCA_RATE-1]); 558 sch->stats_lock, tca[TCA_RATE]);
553 return 0; 559 return 0;
554} 560}
555 561
@@ -581,7 +587,7 @@ static int
581check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 587check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
582{ 588{
583 struct Qdisc *leaf; 589 struct Qdisc *leaf;
584 struct Qdisc_class_ops *cops = q->ops->cl_ops; 590 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
585 struct check_loop_arg *arg = (struct check_loop_arg *)w; 591 struct check_loop_arg *arg = (struct check_loop_arg *)w;
586 592
587 leaf = cops->leaf(q, cl); 593 leaf = cops->leaf(q, cl);
@@ -599,17 +605,25 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
599 605
600static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 606static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
601{ 607{
608 struct net *net = skb->sk->sk_net;
602 struct tcmsg *tcm = NLMSG_DATA(n); 609 struct tcmsg *tcm = NLMSG_DATA(n);
603 struct rtattr **tca = arg; 610 struct nlattr *tca[TCA_MAX + 1];
604 struct net_device *dev; 611 struct net_device *dev;
605 u32 clid = tcm->tcm_parent; 612 u32 clid = tcm->tcm_parent;
606 struct Qdisc *q = NULL; 613 struct Qdisc *q = NULL;
607 struct Qdisc *p = NULL; 614 struct Qdisc *p = NULL;
608 int err; 615 int err;
609 616
617 if (net != &init_net)
618 return -EINVAL;
619
610 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 620 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
611 return -ENODEV; 621 return -ENODEV;
612 622
623 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
624 if (err < 0)
625 return err;
626
613 if (clid) { 627 if (clid) {
614 if (clid != TC_H_ROOT) { 628 if (clid != TC_H_ROOT) {
615 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 629 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
@@ -632,7 +646,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
632 return -ENOENT; 646 return -ENOENT;
633 } 647 }
634 648
635 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) 649 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
636 return -EINVAL; 650 return -EINVAL;
637 651
638 if (n->nlmsg_type == RTM_DELQDISC) { 652 if (n->nlmsg_type == RTM_DELQDISC) {
@@ -660,23 +674,30 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
660 674
661static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 675static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
662{ 676{
677 struct net *net = skb->sk->sk_net;
663 struct tcmsg *tcm; 678 struct tcmsg *tcm;
664 struct rtattr **tca; 679 struct nlattr *tca[TCA_MAX + 1];
665 struct net_device *dev; 680 struct net_device *dev;
666 u32 clid; 681 u32 clid;
667 struct Qdisc *q, *p; 682 struct Qdisc *q, *p;
668 int err; 683 int err;
669 684
685 if (net != &init_net)
686 return -EINVAL;
687
670replay: 688replay:
671 /* Reinit, just in case something touches this. */ 689 /* Reinit, just in case something touches this. */
672 tcm = NLMSG_DATA(n); 690 tcm = NLMSG_DATA(n);
673 tca = arg;
674 clid = tcm->tcm_parent; 691 clid = tcm->tcm_parent;
675 q = p = NULL; 692 q = p = NULL;
676 693
677 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 694 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
678 return -ENODEV; 695 return -ENODEV;
679 696
697 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
698 if (err < 0)
699 return err;
700
680 if (clid) { 701 if (clid) {
681 if (clid != TC_H_ROOT) { 702 if (clid != TC_H_ROOT) {
682 if (clid != TC_H_INGRESS) { 703 if (clid != TC_H_INGRESS) {
@@ -704,7 +725,7 @@ replay:
704 goto create_n_graft; 725 goto create_n_graft;
705 if (n->nlmsg_flags&NLM_F_EXCL) 726 if (n->nlmsg_flags&NLM_F_EXCL)
706 return -EEXIST; 727 return -EEXIST;
707 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) 728 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
708 return -EINVAL; 729 return -EINVAL;
709 if (q == p || 730 if (q == p ||
710 (p && check_loop(q, p, 0))) 731 (p && check_loop(q, p, 0)))
@@ -737,8 +758,8 @@ replay:
737 if ((n->nlmsg_flags&NLM_F_CREATE) && 758 if ((n->nlmsg_flags&NLM_F_CREATE) &&
738 (n->nlmsg_flags&NLM_F_REPLACE) && 759 (n->nlmsg_flags&NLM_F_REPLACE) &&
739 ((n->nlmsg_flags&NLM_F_EXCL) || 760 ((n->nlmsg_flags&NLM_F_EXCL) ||
740 (tca[TCA_KIND-1] && 761 (tca[TCA_KIND] &&
741 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)))) 762 nla_strcmp(tca[TCA_KIND], q->ops->id))))
742 goto create_n_graft; 763 goto create_n_graft;
743 } 764 }
744 } 765 }
@@ -753,7 +774,7 @@ replay:
753 return -ENOENT; 774 return -ENOENT;
754 if (n->nlmsg_flags&NLM_F_EXCL) 775 if (n->nlmsg_flags&NLM_F_EXCL)
755 return -EEXIST; 776 return -EEXIST;
756 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) 777 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
757 return -EINVAL; 778 return -EINVAL;
758 err = qdisc_change(q, tca); 779 err = qdisc_change(q, tca);
759 if (err == 0) 780 if (err == 0)
@@ -814,31 +835,31 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
814 tcm->tcm_parent = clid; 835 tcm->tcm_parent = clid;
815 tcm->tcm_handle = q->handle; 836 tcm->tcm_handle = q->handle;
816 tcm->tcm_info = atomic_read(&q->refcnt); 837 tcm->tcm_info = atomic_read(&q->refcnt);
817 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); 838 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
818 if (q->ops->dump && q->ops->dump(q, skb) < 0) 839 if (q->ops->dump && q->ops->dump(q, skb) < 0)
819 goto rtattr_failure; 840 goto nla_put_failure;
820 q->qstats.qlen = q->q.qlen; 841 q->qstats.qlen = q->q.qlen;
821 842
822 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 843 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
823 TCA_XSTATS, q->stats_lock, &d) < 0) 844 TCA_XSTATS, q->stats_lock, &d) < 0)
824 goto rtattr_failure; 845 goto nla_put_failure;
825 846
826 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 847 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
827 goto rtattr_failure; 848 goto nla_put_failure;
828 849
829 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || 850 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
830 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 851 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
831 gnet_stats_copy_queue(&d, &q->qstats) < 0) 852 gnet_stats_copy_queue(&d, &q->qstats) < 0)
832 goto rtattr_failure; 853 goto nla_put_failure;
833 854
834 if (gnet_stats_finish_copy(&d) < 0) 855 if (gnet_stats_finish_copy(&d) < 0)
835 goto rtattr_failure; 856 goto nla_put_failure;
836 857
837 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 858 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
838 return skb->len; 859 return skb->len;
839 860
840nlmsg_failure: 861nlmsg_failure:
841rtattr_failure: 862nla_put_failure:
842 nlmsg_trim(skb, b); 863 nlmsg_trim(skb, b);
843 return -1; 864 return -1;
844} 865}
@@ -863,7 +884,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
863 } 884 }
864 885
865 if (skb->len) 886 if (skb->len)
866 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 887 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
867 888
868err_out: 889err_out:
869 kfree_skb(skb); 890 kfree_skb(skb);
@@ -872,11 +893,15 @@ err_out:
872 893
873static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 894static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
874{ 895{
896 struct net *net = skb->sk->sk_net;
875 int idx, q_idx; 897 int idx, q_idx;
876 int s_idx, s_q_idx; 898 int s_idx, s_q_idx;
877 struct net_device *dev; 899 struct net_device *dev;
878 struct Qdisc *q; 900 struct Qdisc *q;
879 901
902 if (net != &init_net)
903 return 0;
904
880 s_idx = cb->args[0]; 905 s_idx = cb->args[0];
881 s_q_idx = q_idx = cb->args[1]; 906 s_q_idx = q_idx = cb->args[1];
882 read_lock(&dev_base_lock); 907 read_lock(&dev_base_lock);
@@ -920,11 +945,12 @@ done:
920 945
921static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 946static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
922{ 947{
948 struct net *net = skb->sk->sk_net;
923 struct tcmsg *tcm = NLMSG_DATA(n); 949 struct tcmsg *tcm = NLMSG_DATA(n);
924 struct rtattr **tca = arg; 950 struct nlattr *tca[TCA_MAX + 1];
925 struct net_device *dev; 951 struct net_device *dev;
926 struct Qdisc *q = NULL; 952 struct Qdisc *q = NULL;
927 struct Qdisc_class_ops *cops; 953 const struct Qdisc_class_ops *cops;
928 unsigned long cl = 0; 954 unsigned long cl = 0;
929 unsigned long new_cl; 955 unsigned long new_cl;
930 u32 pid = tcm->tcm_parent; 956 u32 pid = tcm->tcm_parent;
@@ -932,9 +958,16 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
932 u32 qid = TC_H_MAJ(clid); 958 u32 qid = TC_H_MAJ(clid);
933 int err; 959 int err;
934 960
961 if (net != &init_net)
962 return -EINVAL;
963
935 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 964 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
936 return -ENODEV; 965 return -ENODEV;
937 966
967 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
968 if (err < 0)
969 return err;
970
938 /* 971 /*
939 parent == TC_H_UNSPEC - unspecified parent. 972 parent == TC_H_UNSPEC - unspecified parent.
940 parent == TC_H_ROOT - class is root, which has no parent. 973 parent == TC_H_ROOT - class is root, which has no parent.
@@ -1039,7 +1072,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1039 struct nlmsghdr *nlh; 1072 struct nlmsghdr *nlh;
1040 unsigned char *b = skb_tail_pointer(skb); 1073 unsigned char *b = skb_tail_pointer(skb);
1041 struct gnet_dump d; 1074 struct gnet_dump d;
1042 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1075 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1043 1076
1044 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 1077 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1045 tcm = NLMSG_DATA(nlh); 1078 tcm = NLMSG_DATA(nlh);
@@ -1048,25 +1081,25 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1048 tcm->tcm_parent = q->handle; 1081 tcm->tcm_parent = q->handle;
1049 tcm->tcm_handle = q->handle; 1082 tcm->tcm_handle = q->handle;
1050 tcm->tcm_info = 0; 1083 tcm->tcm_info = 0;
1051 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); 1084 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1052 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1085 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1053 goto rtattr_failure; 1086 goto nla_put_failure;
1054 1087
1055 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 1088 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1056 TCA_XSTATS, q->stats_lock, &d) < 0) 1089 TCA_XSTATS, q->stats_lock, &d) < 0)
1057 goto rtattr_failure; 1090 goto nla_put_failure;
1058 1091
1059 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1092 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1060 goto rtattr_failure; 1093 goto nla_put_failure;
1061 1094
1062 if (gnet_stats_finish_copy(&d) < 0) 1095 if (gnet_stats_finish_copy(&d) < 0)
1063 goto rtattr_failure; 1096 goto nla_put_failure;
1064 1097
1065 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1098 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1066 return skb->len; 1099 return skb->len;
1067 1100
1068nlmsg_failure: 1101nlmsg_failure:
1069rtattr_failure: 1102nla_put_failure:
1070 nlmsg_trim(skb, b); 1103 nlmsg_trim(skb, b);
1071 return -1; 1104 return -1;
1072} 1105}
@@ -1086,7 +1119,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1086 return -EINVAL; 1119 return -EINVAL;
1087 } 1120 }
1088 1121
1089 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1122 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1090} 1123}
1091 1124
1092struct qdisc_dump_args 1125struct qdisc_dump_args
@@ -1106,6 +1139,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
1106 1139
1107static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 1140static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1108{ 1141{
1142 struct net *net = skb->sk->sk_net;
1109 int t; 1143 int t;
1110 int s_t; 1144 int s_t;
1111 struct net_device *dev; 1145 struct net_device *dev;
@@ -1113,6 +1147,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1113 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); 1147 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1114 struct qdisc_dump_args arg; 1148 struct qdisc_dump_args arg;
1115 1149
1150 if (net != &init_net)
1151 return 0;
1152
1116 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1153 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1117 return 0; 1154 return 0;
1118 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 1155 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
@@ -1268,8 +1305,3 @@ static int __init pktsched_init(void)
1268} 1305}
1269 1306
1270subsys_initcall(pktsched_init); 1307subsys_initcall(pktsched_init);
1271
1272EXPORT_SYMBOL(qdisc_get_rtab);
1273EXPORT_SYMBOL(qdisc_put_rtab);
1274EXPORT_SYMBOL(register_qdisc);
1275EXPORT_SYMBOL(unregister_qdisc);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ddc4f2c54379..335273416384 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -16,18 +16,6 @@
16 16
17extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ 17extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */
18 18
19#if 0 /* control */
20#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
21#else
22#define DPRINTK(format,args...)
23#endif
24
25#if 0 /* data */
26#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
27#else
28#define D2PRINTK(format,args...)
29#endif
30
31/* 19/*
32 * The ATM queuing discipline provides a framework for invoking classifiers 20 * The ATM queuing discipline provides a framework for invoking classifiers
33 * (aka "filters"), which in turn select classes of this queuing discipline. 21 * (aka "filters"), which in turn select classes of this queuing discipline.
@@ -49,7 +37,6 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */
49 * - should lock the flow while there is data in the queue (?) 37 * - should lock the flow while there is data in the queue (?)
50 */ 38 */
51 39
52#define PRIV(sch) qdisc_priv(sch)
53#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) 40#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
54 41
55struct atm_flow_data { 42struct atm_flow_data {
@@ -57,7 +44,7 @@ struct atm_flow_data {
57 struct tcf_proto *filter_list; 44 struct tcf_proto *filter_list;
58 struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ 45 struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
59 void (*old_pop)(struct atm_vcc *vcc, 46 void (*old_pop)(struct atm_vcc *vcc,
60 struct sk_buff * skb); /* chaining */ 47 struct sk_buff *skb); /* chaining */
61 struct atm_qdisc_data *parent; /* parent qdisc */ 48 struct atm_qdisc_data *parent; /* parent qdisc */
62 struct socket *sock; /* for closing */ 49 struct socket *sock; /* for closing */
63 u32 classid; /* x:y type ID */ 50 u32 classid; /* x:y type ID */
@@ -84,17 +71,17 @@ static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
84{ 71{
85 struct atm_flow_data *walk; 72 struct atm_flow_data *walk;
86 73
87 DPRINTK("find_flow(qdisc %p,flow %p)\n", qdisc, flow); 74 pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
88 for (walk = qdisc->flows; walk; walk = walk->next) 75 for (walk = qdisc->flows; walk; walk = walk->next)
89 if (walk == flow) 76 if (walk == flow)
90 return 1; 77 return 1;
91 DPRINTK("find_flow: not found\n"); 78 pr_debug("find_flow: not found\n");
92 return 0; 79 return 0;
93} 80}
94 81
95static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) 82static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
96{ 83{
97 struct atm_qdisc_data *p = PRIV(sch); 84 struct atm_qdisc_data *p = qdisc_priv(sch);
98 struct atm_flow_data *flow; 85 struct atm_flow_data *flow;
99 86
100 for (flow = p->flows; flow; flow = flow->next) 87 for (flow = p->flows; flow; flow = flow->next)
@@ -106,10 +93,10 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
106static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, 93static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
107 struct Qdisc *new, struct Qdisc **old) 94 struct Qdisc *new, struct Qdisc **old)
108{ 95{
109 struct atm_qdisc_data *p = PRIV(sch); 96 struct atm_qdisc_data *p = qdisc_priv(sch);
110 struct atm_flow_data *flow = (struct atm_flow_data *)arg; 97 struct atm_flow_data *flow = (struct atm_flow_data *)arg;
111 98
112 DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", 99 pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
113 sch, p, flow, new, old); 100 sch, p, flow, new, old);
114 if (!find_flow(p, flow)) 101 if (!find_flow(p, flow))
115 return -EINVAL; 102 return -EINVAL;
@@ -125,20 +112,20 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
125{ 112{
126 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 113 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
127 114
128 DPRINTK("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); 115 pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
129 return flow ? flow->q : NULL; 116 return flow ? flow->q : NULL;
130} 117}
131 118
132static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid) 119static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
133{ 120{
134 struct atm_qdisc_data *p __maybe_unused = PRIV(sch); 121 struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
135 struct atm_flow_data *flow; 122 struct atm_flow_data *flow;
136 123
137 DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid); 124 pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
138 flow = lookup_flow(sch, classid); 125 flow = lookup_flow(sch, classid);
139 if (flow) 126 if (flow)
140 flow->ref++; 127 flow->ref++;
141 DPRINTK("atm_tc_get: flow %p\n", flow); 128 pr_debug("atm_tc_get: flow %p\n", flow);
142 return (unsigned long)flow; 129 return (unsigned long)flow;
143} 130}
144 131
@@ -155,14 +142,14 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
155 */ 142 */
156static void atm_tc_put(struct Qdisc *sch, unsigned long cl) 143static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
157{ 144{
158 struct atm_qdisc_data *p = PRIV(sch); 145 struct atm_qdisc_data *p = qdisc_priv(sch);
159 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 146 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
160 struct atm_flow_data **prev; 147 struct atm_flow_data **prev;
161 148
162 DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 149 pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
163 if (--flow->ref) 150 if (--flow->ref)
164 return; 151 return;
165 DPRINTK("atm_tc_put: destroying\n"); 152 pr_debug("atm_tc_put: destroying\n");
166 for (prev = &p->flows; *prev; prev = &(*prev)->next) 153 for (prev = &p->flows; *prev; prev = &(*prev)->next)
167 if (*prev == flow) 154 if (*prev == flow)
168 break; 155 break;
@@ -171,11 +158,11 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
171 return; 158 return;
172 } 159 }
173 *prev = flow->next; 160 *prev = flow->next;
174 DPRINTK("atm_tc_put: qdisc %p\n", flow->q); 161 pr_debug("atm_tc_put: qdisc %p\n", flow->q);
175 qdisc_destroy(flow->q); 162 qdisc_destroy(flow->q);
176 tcf_destroy_chain(flow->filter_list); 163 tcf_destroy_chain(flow->filter_list);
177 if (flow->sock) { 164 if (flow->sock) {
178 DPRINTK("atm_tc_put: f_count %d\n", 165 pr_debug("atm_tc_put: f_count %d\n",
179 file_count(flow->sock->file)); 166 file_count(flow->sock->file));
180 flow->vcc->pop = flow->old_pop; 167 flow->vcc->pop = flow->old_pop;
181 sockfd_put(flow->sock); 168 sockfd_put(flow->sock);
@@ -194,7 +181,7 @@ static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
194{ 181{
195 struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; 182 struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
196 183
197 D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); 184 pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
198 VCC2FLOW(vcc)->old_pop(vcc, skb); 185 VCC2FLOW(vcc)->old_pop(vcc, skb);
199 tasklet_schedule(&p->task); 186 tasklet_schedule(&p->task);
200} 187}
@@ -208,19 +195,24 @@ static const u8 llc_oui_ip[] = {
208 0x08, 0x00 195 0x08, 0x00
209}; /* Ethertype IP (0800) */ 196}; /* Ethertype IP (0800) */
210 197
198static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
199 [TCA_ATM_FD] = { .type = NLA_U32 },
200 [TCA_ATM_EXCESS] = { .type = NLA_U32 },
201};
202
211static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, 203static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
212 struct rtattr **tca, unsigned long *arg) 204 struct nlattr **tca, unsigned long *arg)
213{ 205{
214 struct atm_qdisc_data *p = PRIV(sch); 206 struct atm_qdisc_data *p = qdisc_priv(sch);
215 struct atm_flow_data *flow = (struct atm_flow_data *)*arg; 207 struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
216 struct atm_flow_data *excess = NULL; 208 struct atm_flow_data *excess = NULL;
217 struct rtattr *opt = tca[TCA_OPTIONS - 1]; 209 struct nlattr *opt = tca[TCA_OPTIONS];
218 struct rtattr *tb[TCA_ATM_MAX]; 210 struct nlattr *tb[TCA_ATM_MAX + 1];
219 struct socket *sock; 211 struct socket *sock;
220 int fd, error, hdr_len; 212 int fd, error, hdr_len;
221 void *hdr; 213 void *hdr;
222 214
223 DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," 215 pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
224 "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); 216 "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
225 /* 217 /*
226 * The concept of parents doesn't apply for this qdisc. 218 * The concept of parents doesn't apply for this qdisc.
@@ -236,34 +228,38 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
236 */ 228 */
237 if (flow) 229 if (flow)
238 return -EBUSY; 230 return -EBUSY;
239 if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt)) 231 if (opt == NULL)
240 return -EINVAL; 232 return -EINVAL;
241 if (!tb[TCA_ATM_FD - 1] || RTA_PAYLOAD(tb[TCA_ATM_FD - 1]) < sizeof(fd)) 233
234 error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy);
235 if (error < 0)
236 return error;
237
238 if (!tb[TCA_ATM_FD])
242 return -EINVAL; 239 return -EINVAL;
243 fd = *(int *)RTA_DATA(tb[TCA_ATM_FD - 1]); 240 fd = nla_get_u32(tb[TCA_ATM_FD]);
244 DPRINTK("atm_tc_change: fd %d\n", fd); 241 pr_debug("atm_tc_change: fd %d\n", fd);
245 if (tb[TCA_ATM_HDR - 1]) { 242 if (tb[TCA_ATM_HDR]) {
246 hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR - 1]); 243 hdr_len = nla_len(tb[TCA_ATM_HDR]);
247 hdr = RTA_DATA(tb[TCA_ATM_HDR - 1]); 244 hdr = nla_data(tb[TCA_ATM_HDR]);
248 } else { 245 } else {
249 hdr_len = RFC1483LLC_LEN; 246 hdr_len = RFC1483LLC_LEN;
250 hdr = NULL; /* default LLC/SNAP for IP */ 247 hdr = NULL; /* default LLC/SNAP for IP */
251 } 248 }
252 if (!tb[TCA_ATM_EXCESS - 1]) 249 if (!tb[TCA_ATM_EXCESS])
253 excess = NULL; 250 excess = NULL;
254 else { 251 else {
255 if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS - 1]) != sizeof(u32))
256 return -EINVAL;
257 excess = (struct atm_flow_data *) 252 excess = (struct atm_flow_data *)
258 atm_tc_get(sch, *(u32 *)RTA_DATA(tb[TCA_ATM_EXCESS - 1])); 253 atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
259 if (!excess) 254 if (!excess)
260 return -ENOENT; 255 return -ENOENT;
261 } 256 }
262 DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n", 257 pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
263 opt->rta_type, RTA_PAYLOAD(opt), hdr_len); 258 opt->nla_type, nla_len(opt), hdr_len);
264 if (!(sock = sockfd_lookup(fd, &error))) 259 sock = sockfd_lookup(fd, &error);
260 if (!sock)
265 return error; /* f_count++ */ 261 return error; /* f_count++ */
266 DPRINTK("atm_tc_change: f_count %d\n", file_count(sock->file)); 262 pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file));
267 if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { 263 if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
268 error = -EPROTOTYPE; 264 error = -EPROTOTYPE;
269 goto err_out; 265 goto err_out;
@@ -272,7 +268,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
272 on vcc->send */ 268 on vcc->send */
273 if (classid) { 269 if (classid) {
274 if (TC_H_MAJ(classid ^ sch->handle)) { 270 if (TC_H_MAJ(classid ^ sch->handle)) {
275 DPRINTK("atm_tc_change: classid mismatch\n"); 271 pr_debug("atm_tc_change: classid mismatch\n");
276 error = -EINVAL; 272 error = -EINVAL;
277 goto err_out; 273 goto err_out;
278 } 274 }
@@ -286,26 +282,28 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
286 282
287 for (i = 1; i < 0x8000; i++) { 283 for (i = 1; i < 0x8000; i++) {
288 classid = TC_H_MAKE(sch->handle, 0x8000 | i); 284 classid = TC_H_MAKE(sch->handle, 0x8000 | i);
289 if (!(cl = atm_tc_get(sch, classid))) 285 cl = atm_tc_get(sch, classid);
286 if (!cl)
290 break; 287 break;
291 atm_tc_put(sch, cl); 288 atm_tc_put(sch, cl);
292 } 289 }
293 } 290 }
294 DPRINTK("atm_tc_change: new id %x\n", classid); 291 pr_debug("atm_tc_change: new id %x\n", classid);
295 flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); 292 flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
296 DPRINTK("atm_tc_change: flow %p\n", flow); 293 pr_debug("atm_tc_change: flow %p\n", flow);
297 if (!flow) { 294 if (!flow) {
298 error = -ENOBUFS; 295 error = -ENOBUFS;
299 goto err_out; 296 goto err_out;
300 } 297 }
301 flow->filter_list = NULL; 298 flow->filter_list = NULL;
302 if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) 299 flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
300 if (!flow->q)
303 flow->q = &noop_qdisc; 301 flow->q = &noop_qdisc;
304 DPRINTK("atm_tc_change: qdisc %p\n", flow->q); 302 pr_debug("atm_tc_change: qdisc %p\n", flow->q);
305 flow->sock = sock; 303 flow->sock = sock;
306 flow->vcc = ATM_SD(sock); /* speedup */ 304 flow->vcc = ATM_SD(sock); /* speedup */
307 flow->vcc->user_back = flow; 305 flow->vcc->user_back = flow;
308 DPRINTK("atm_tc_change: vcc %p\n", flow->vcc); 306 pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
309 flow->old_pop = flow->vcc->pop; 307 flow->old_pop = flow->vcc->pop;
310 flow->parent = p; 308 flow->parent = p;
311 flow->vcc->pop = sch_atm_pop; 309 flow->vcc->pop = sch_atm_pop;
@@ -330,11 +328,11 @@ err_out:
330 328
331static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) 329static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
332{ 330{
333 struct atm_qdisc_data *p = PRIV(sch); 331 struct atm_qdisc_data *p = qdisc_priv(sch);
334 struct atm_flow_data *flow = (struct atm_flow_data *)arg; 332 struct atm_flow_data *flow = (struct atm_flow_data *)arg;
335 333
336 DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 334 pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
337 if (!find_flow(PRIV(sch), flow)) 335 if (!find_flow(qdisc_priv(sch), flow))
338 return -EINVAL; 336 return -EINVAL;
339 if (flow->filter_list || flow == &p->link) 337 if (flow->filter_list || flow == &p->link)
340 return -EBUSY; 338 return -EBUSY;
@@ -354,10 +352,10 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
354 352
355static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) 353static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
356{ 354{
357 struct atm_qdisc_data *p = PRIV(sch); 355 struct atm_qdisc_data *p = qdisc_priv(sch);
358 struct atm_flow_data *flow; 356 struct atm_flow_data *flow;
359 357
360 DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); 358 pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
361 if (walker->stop) 359 if (walker->stop)
362 return; 360 return;
363 for (flow = p->flows; flow; flow = flow->next) { 361 for (flow = p->flows; flow; flow = flow->next) {
@@ -372,10 +370,10 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
372 370
373static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) 371static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
374{ 372{
375 struct atm_qdisc_data *p = PRIV(sch); 373 struct atm_qdisc_data *p = qdisc_priv(sch);
376 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 374 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
377 375
378 DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 376 pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
379 return flow ? &flow->filter_list : &p->link.filter_list; 377 return flow ? &flow->filter_list : &p->link.filter_list;
380} 378}
381 379
@@ -383,13 +381,13 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
383 381
384static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 382static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
385{ 383{
386 struct atm_qdisc_data *p = PRIV(sch); 384 struct atm_qdisc_data *p = qdisc_priv(sch);
387 struct atm_flow_data *flow = NULL; /* @@@ */ 385 struct atm_flow_data *flow = NULL; /* @@@ */
388 struct tcf_result res; 386 struct tcf_result res;
389 int result; 387 int result;
390 int ret = NET_XMIT_POLICED; 388 int ret = NET_XMIT_POLICED;
391 389
392 D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 390 pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
393 result = TC_POLICE_OK; /* be nice to gcc */ 391 result = TC_POLICE_OK; /* be nice to gcc */
394 if (TC_H_MAJ(skb->priority) != sch->handle || 392 if (TC_H_MAJ(skb->priority) != sch->handle ||
395 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) 393 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority)))
@@ -430,7 +428,8 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
430#endif 428#endif
431 } 429 }
432 430
433 if ((ret = flow->q->enqueue(skb, flow->q)) != 0) { 431 ret = flow->q->enqueue(skb, flow->q);
432 if (ret != 0) {
434drop: __maybe_unused 433drop: __maybe_unused
435 sch->qstats.drops++; 434 sch->qstats.drops++;
436 if (flow) 435 if (flow)
@@ -468,11 +467,11 @@ drop: __maybe_unused
468static void sch_atm_dequeue(unsigned long data) 467static void sch_atm_dequeue(unsigned long data)
469{ 468{
470 struct Qdisc *sch = (struct Qdisc *)data; 469 struct Qdisc *sch = (struct Qdisc *)data;
471 struct atm_qdisc_data *p = PRIV(sch); 470 struct atm_qdisc_data *p = qdisc_priv(sch);
472 struct atm_flow_data *flow; 471 struct atm_flow_data *flow;
473 struct sk_buff *skb; 472 struct sk_buff *skb;
474 473
475 D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); 474 pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
476 for (flow = p->link.next; flow; flow = flow->next) 475 for (flow = p->link.next; flow; flow = flow->next)
477 /* 476 /*
478 * If traffic is properly shaped, this won't generate nasty 477 * If traffic is properly shaped, this won't generate nasty
@@ -483,7 +482,7 @@ static void sch_atm_dequeue(unsigned long data)
483 (void)flow->q->ops->requeue(skb, flow->q); 482 (void)flow->q->ops->requeue(skb, flow->q);
484 break; 483 break;
485 } 484 }
486 D2PRINTK("atm_tc_dequeue: sending on class %p\n", flow); 485 pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
487 /* remove any LL header somebody else has attached */ 486 /* remove any LL header somebody else has attached */
488 skb_pull(skb, skb_network_offset(skb)); 487 skb_pull(skb, skb_network_offset(skb));
489 if (skb_headroom(skb) < flow->hdr_len) { 488 if (skb_headroom(skb) < flow->hdr_len) {
@@ -495,7 +494,7 @@ static void sch_atm_dequeue(unsigned long data)
495 continue; 494 continue;
496 skb = new; 495 skb = new;
497 } 496 }
498 D2PRINTK("sch_atm_dequeue: ip %p, data %p\n", 497 pr_debug("sch_atm_dequeue: ip %p, data %p\n",
499 skb_network_header(skb), skb->data); 498 skb_network_header(skb), skb->data);
500 ATM_SKB(skb)->vcc = flow->vcc; 499 ATM_SKB(skb)->vcc = flow->vcc;
501 memcpy(skb_push(skb, flow->hdr_len), flow->hdr, 500 memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
@@ -509,10 +508,10 @@ static void sch_atm_dequeue(unsigned long data)
509 508
510static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) 509static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
511{ 510{
512 struct atm_qdisc_data *p = PRIV(sch); 511 struct atm_qdisc_data *p = qdisc_priv(sch);
513 struct sk_buff *skb; 512 struct sk_buff *skb;
514 513
515 D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); 514 pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
516 tasklet_schedule(&p->task); 515 tasklet_schedule(&p->task);
517 skb = p->link.q->dequeue(p->link.q); 516 skb = p->link.q->dequeue(p->link.q);
518 if (skb) 517 if (skb)
@@ -522,10 +521,10 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
522 521
523static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) 522static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
524{ 523{
525 struct atm_qdisc_data *p = PRIV(sch); 524 struct atm_qdisc_data *p = qdisc_priv(sch);
526 int ret; 525 int ret;
527 526
528 D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 527 pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
529 ret = p->link.q->ops->requeue(skb, p->link.q); 528 ret = p->link.q->ops->requeue(skb, p->link.q);
530 if (!ret) { 529 if (!ret) {
531 sch->q.qlen++; 530 sch->q.qlen++;
@@ -539,27 +538,27 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
539 538
540static unsigned int atm_tc_drop(struct Qdisc *sch) 539static unsigned int atm_tc_drop(struct Qdisc *sch)
541{ 540{
542 struct atm_qdisc_data *p = PRIV(sch); 541 struct atm_qdisc_data *p = qdisc_priv(sch);
543 struct atm_flow_data *flow; 542 struct atm_flow_data *flow;
544 unsigned int len; 543 unsigned int len;
545 544
546 DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); 545 pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
547 for (flow = p->flows; flow; flow = flow->next) 546 for (flow = p->flows; flow; flow = flow->next)
548 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) 547 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
549 return len; 548 return len;
550 return 0; 549 return 0;
551} 550}
552 551
553static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt) 552static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
554{ 553{
555 struct atm_qdisc_data *p = PRIV(sch); 554 struct atm_qdisc_data *p = qdisc_priv(sch);
556 555
557 DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); 556 pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
558 p->flows = &p->link; 557 p->flows = &p->link;
559 if (!(p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, 558 p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
560 sch->handle))) 559 if (!p->link.q)
561 p->link.q = &noop_qdisc; 560 p->link.q = &noop_qdisc;
562 DPRINTK("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); 561 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
563 p->link.filter_list = NULL; 562 p->link.filter_list = NULL;
564 p->link.vcc = NULL; 563 p->link.vcc = NULL;
565 p->link.sock = NULL; 564 p->link.sock = NULL;
@@ -572,10 +571,10 @@ static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt)
572 571
573static void atm_tc_reset(struct Qdisc *sch) 572static void atm_tc_reset(struct Qdisc *sch)
574{ 573{
575 struct atm_qdisc_data *p = PRIV(sch); 574 struct atm_qdisc_data *p = qdisc_priv(sch);
576 struct atm_flow_data *flow; 575 struct atm_flow_data *flow;
577 576
578 DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); 577 pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
579 for (flow = p->flows; flow; flow = flow->next) 578 for (flow = p->flows; flow; flow = flow->next)
580 qdisc_reset(flow->q); 579 qdisc_reset(flow->q);
581 sch->q.qlen = 0; 580 sch->q.qlen = 0;
@@ -583,10 +582,10 @@ static void atm_tc_reset(struct Qdisc *sch)
583 582
584static void atm_tc_destroy(struct Qdisc *sch) 583static void atm_tc_destroy(struct Qdisc *sch)
585{ 584{
586 struct atm_qdisc_data *p = PRIV(sch); 585 struct atm_qdisc_data *p = qdisc_priv(sch);
587 struct atm_flow_data *flow; 586 struct atm_flow_data *flow;
588 587
589 DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); 588 pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
590 /* races ? */ 589 /* races ? */
591 while ((flow = p->flows)) { 590 while ((flow = p->flows)) {
592 tcf_destroy_chain(flow->filter_list); 591 tcf_destroy_chain(flow->filter_list);
@@ -608,20 +607,22 @@ static void atm_tc_destroy(struct Qdisc *sch)
608static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, 607static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
609 struct sk_buff *skb, struct tcmsg *tcm) 608 struct sk_buff *skb, struct tcmsg *tcm)
610{ 609{
611 struct atm_qdisc_data *p = PRIV(sch); 610 struct atm_qdisc_data *p = qdisc_priv(sch);
612 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 611 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
613 unsigned char *b = skb_tail_pointer(skb); 612 struct nlattr *nest;
614 struct rtattr *rta;
615 613
616 DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", 614 pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
617 sch, p, flow, skb, tcm); 615 sch, p, flow, skb, tcm);
618 if (!find_flow(p, flow)) 616 if (!find_flow(p, flow))
619 return -EINVAL; 617 return -EINVAL;
620 tcm->tcm_handle = flow->classid; 618 tcm->tcm_handle = flow->classid;
621 tcm->tcm_info = flow->q->handle; 619 tcm->tcm_info = flow->q->handle;
622 rta = (struct rtattr *)b; 620
623 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 621 nest = nla_nest_start(skb, TCA_OPTIONS);
624 RTA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr); 622 if (nest == NULL)
623 goto nla_put_failure;
624
625 NLA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr);
625 if (flow->vcc) { 626 if (flow->vcc) {
626 struct sockaddr_atmpvc pvc; 627 struct sockaddr_atmpvc pvc;
627 int state; 628 int state;
@@ -630,22 +631,21 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
630 pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; 631 pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
631 pvc.sap_addr.vpi = flow->vcc->vpi; 632 pvc.sap_addr.vpi = flow->vcc->vpi;
632 pvc.sap_addr.vci = flow->vcc->vci; 633 pvc.sap_addr.vci = flow->vcc->vci;
633 RTA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc); 634 NLA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc);
634 state = ATM_VF2VS(flow->vcc->flags); 635 state = ATM_VF2VS(flow->vcc->flags);
635 RTA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state); 636 NLA_PUT_U32(skb, TCA_ATM_STATE, state);
636 } 637 }
637 if (flow->excess) 638 if (flow->excess)
638 RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid); 639 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
639 else { 640 else {
640 static u32 zero; 641 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
641
642 RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero);
643 } 642 }
644 rta->rta_len = skb_tail_pointer(skb) - b; 643
644 nla_nest_end(skb, nest);
645 return skb->len; 645 return skb->len;
646 646
647rtattr_failure: 647nla_put_failure:
648 nlmsg_trim(skb, b); 648 nla_nest_cancel(skb, nest);
649 return -1; 649 return -1;
650} 650}
651static int 651static int
@@ -668,7 +668,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
668 return 0; 668 return 0;
669} 669}
670 670
671static struct Qdisc_class_ops atm_class_ops = { 671static const struct Qdisc_class_ops atm_class_ops = {
672 .graft = atm_tc_graft, 672 .graft = atm_tc_graft,
673 .leaf = atm_tc_leaf, 673 .leaf = atm_tc_leaf,
674 .get = atm_tc_get, 674 .get = atm_tc_get,
@@ -683,7 +683,7 @@ static struct Qdisc_class_ops atm_class_ops = {
683 .dump_stats = atm_tc_dump_class_stats, 683 .dump_stats = atm_tc_dump_class_stats,
684}; 684};
685 685
686static struct Qdisc_ops atm_qdisc_ops = { 686static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
687 .cl_ops = &atm_class_ops, 687 .cl_ops = &atm_class_ops,
688 .id = "atm", 688 .id = "atm",
689 .priv_size = sizeof(struct atm_qdisc_data), 689 .priv_size = sizeof(struct atm_qdisc_data),
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index f914fc43a124..507fb488bc98 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -28,7 +28,7 @@ static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
28 return NULL; 28 return NULL;
29} 29}
30 30
31static struct Qdisc_ops blackhole_qdisc_ops = { 31static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
32 .id = "blackhole", 32 .id = "blackhole",
33 .priv_size = 0, 33 .priv_size = 0,
34 .enqueue = blackhole_enqueue, 34 .enqueue = blackhole_enqueue,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4de3744e65c3..09969c1fbc08 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1377,24 +1377,33 @@ static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1377 return 0; 1377 return 0;
1378} 1378}
1379 1379
1380static int cbq_init(struct Qdisc *sch, struct rtattr *opt) 1380static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1381 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1382 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1383 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1384 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1385 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1386 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1387 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1388};
1389
1390static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1381{ 1391{
1382 struct cbq_sched_data *q = qdisc_priv(sch); 1392 struct cbq_sched_data *q = qdisc_priv(sch);
1383 struct rtattr *tb[TCA_CBQ_MAX]; 1393 struct nlattr *tb[TCA_CBQ_MAX + 1];
1384 struct tc_ratespec *r; 1394 struct tc_ratespec *r;
1395 int err;
1385 1396
1386 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 || 1397 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1387 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL || 1398 if (err < 0)
1388 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec)) 1399 return err;
1389 return -EINVAL;
1390 1400
1391 if (tb[TCA_CBQ_LSSOPT-1] && 1401 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1392 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1393 return -EINVAL; 1402 return -EINVAL;
1394 1403
1395 r = RTA_DATA(tb[TCA_CBQ_RATE-1]); 1404 r = nla_data(tb[TCA_CBQ_RATE]);
1396 1405
1397 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL) 1406 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1398 return -EINVAL; 1407 return -EINVAL;
1399 1408
1400 q->link.refcnt = 1; 1409 q->link.refcnt = 1;
@@ -1427,8 +1436,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1427 1436
1428 cbq_link_class(&q->link); 1437 cbq_link_class(&q->link);
1429 1438
1430 if (tb[TCA_CBQ_LSSOPT-1]) 1439 if (tb[TCA_CBQ_LSSOPT])
1431 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); 1440 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1432 1441
1433 cbq_addprio(q, &q->link); 1442 cbq_addprio(q, &q->link);
1434 return 0; 1443 return 0;
@@ -1438,10 +1447,10 @@ static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1438{ 1447{
1439 unsigned char *b = skb_tail_pointer(skb); 1448 unsigned char *b = skb_tail_pointer(skb);
1440 1449
1441 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate); 1450 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1442 return skb->len; 1451 return skb->len;
1443 1452
1444rtattr_failure: 1453nla_put_failure:
1445 nlmsg_trim(skb, b); 1454 nlmsg_trim(skb, b);
1446 return -1; 1455 return -1;
1447} 1456}
@@ -1463,10 +1472,10 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1463 opt.minidle = (u32)(-cl->minidle); 1472 opt.minidle = (u32)(-cl->minidle);
1464 opt.offtime = cl->offtime; 1473 opt.offtime = cl->offtime;
1465 opt.change = ~0; 1474 opt.change = ~0;
1466 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt); 1475 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1467 return skb->len; 1476 return skb->len;
1468 1477
1469rtattr_failure: 1478nla_put_failure:
1470 nlmsg_trim(skb, b); 1479 nlmsg_trim(skb, b);
1471 return -1; 1480 return -1;
1472} 1481}
@@ -1481,10 +1490,10 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1481 opt.priority = cl->priority+1; 1490 opt.priority = cl->priority+1;
1482 opt.cpriority = cl->cpriority+1; 1491 opt.cpriority = cl->cpriority+1;
1483 opt.weight = cl->weight; 1492 opt.weight = cl->weight;
1484 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); 1493 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1485 return skb->len; 1494 return skb->len;
1486 1495
1487rtattr_failure: 1496nla_put_failure:
1488 nlmsg_trim(skb, b); 1497 nlmsg_trim(skb, b);
1489 return -1; 1498 return -1;
1490} 1499}
@@ -1498,10 +1507,10 @@ static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1498 opt.priority2 = cl->priority2+1; 1507 opt.priority2 = cl->priority2+1;
1499 opt.pad = 0; 1508 opt.pad = 0;
1500 opt.penalty = cl->penalty; 1509 opt.penalty = cl->penalty;
1501 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); 1510 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1502 return skb->len; 1511 return skb->len;
1503 1512
1504rtattr_failure: 1513nla_put_failure:
1505 nlmsg_trim(skb, b); 1514 nlmsg_trim(skb, b);
1506 return -1; 1515 return -1;
1507} 1516}
@@ -1515,11 +1524,11 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1515 opt.split = cl->split ? cl->split->classid : 0; 1524 opt.split = cl->split ? cl->split->classid : 0;
1516 opt.defmap = cl->defmap; 1525 opt.defmap = cl->defmap;
1517 opt.defchange = ~0; 1526 opt.defchange = ~0;
1518 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt); 1527 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1519 } 1528 }
1520 return skb->len; 1529 return skb->len;
1521 1530
1522rtattr_failure: 1531nla_put_failure:
1523 nlmsg_trim(skb, b); 1532 nlmsg_trim(skb, b);
1524 return -1; 1533 return -1;
1525} 1534}
@@ -1534,11 +1543,11 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1534 opt.police = cl->police; 1543 opt.police = cl->police;
1535 opt.__res1 = 0; 1544 opt.__res1 = 0;
1536 opt.__res2 = 0; 1545 opt.__res2 = 0;
1537 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt); 1546 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1538 } 1547 }
1539 return skb->len; 1548 return skb->len;
1540 1549
1541rtattr_failure: 1550nla_put_failure:
1542 nlmsg_trim(skb, b); 1551 nlmsg_trim(skb, b);
1543 return -1; 1552 return -1;
1544} 1553}
@@ -1561,18 +1570,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1561static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) 1570static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1562{ 1571{
1563 struct cbq_sched_data *q = qdisc_priv(sch); 1572 struct cbq_sched_data *q = qdisc_priv(sch);
1564 unsigned char *b = skb_tail_pointer(skb); 1573 struct nlattr *nest;
1565 struct rtattr *rta;
1566 1574
1567 rta = (struct rtattr*)b; 1575 nest = nla_nest_start(skb, TCA_OPTIONS);
1568 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1576 if (nest == NULL)
1577 goto nla_put_failure;
1569 if (cbq_dump_attr(skb, &q->link) < 0) 1578 if (cbq_dump_attr(skb, &q->link) < 0)
1570 goto rtattr_failure; 1579 goto nla_put_failure;
1571 rta->rta_len = skb_tail_pointer(skb) - b; 1580 nla_nest_end(skb, nest);
1572 return skb->len; 1581 return skb->len;
1573 1582
1574rtattr_failure: 1583nla_put_failure:
1575 nlmsg_trim(skb, b); 1584 nla_nest_cancel(skb, nest);
1576 return -1; 1585 return -1;
1577} 1586}
1578 1587
@@ -1590,8 +1599,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1590 struct sk_buff *skb, struct tcmsg *tcm) 1599 struct sk_buff *skb, struct tcmsg *tcm)
1591{ 1600{
1592 struct cbq_class *cl = (struct cbq_class*)arg; 1601 struct cbq_class *cl = (struct cbq_class*)arg;
1593 unsigned char *b = skb_tail_pointer(skb); 1602 struct nlattr *nest;
1594 struct rtattr *rta;
1595 1603
1596 if (cl->tparent) 1604 if (cl->tparent)
1597 tcm->tcm_parent = cl->tparent->classid; 1605 tcm->tcm_parent = cl->tparent->classid;
@@ -1600,15 +1608,16 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1600 tcm->tcm_handle = cl->classid; 1608 tcm->tcm_handle = cl->classid;
1601 tcm->tcm_info = cl->q->handle; 1609 tcm->tcm_info = cl->q->handle;
1602 1610
1603 rta = (struct rtattr*)b; 1611 nest = nla_nest_start(skb, TCA_OPTIONS);
1604 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1612 if (nest == NULL)
1613 goto nla_put_failure;
1605 if (cbq_dump_attr(skb, cl) < 0) 1614 if (cbq_dump_attr(skb, cl) < 0)
1606 goto rtattr_failure; 1615 goto nla_put_failure;
1607 rta->rta_len = skb_tail_pointer(skb) - b; 1616 nla_nest_end(skb, nest);
1608 return skb->len; 1617 return skb->len;
1609 1618
1610rtattr_failure: 1619nla_put_failure:
1611 nlmsg_trim(skb, b); 1620 nla_nest_cancel(skb, nest);
1612 return -1; 1621 return -1;
1613} 1622}
1614 1623
@@ -1753,45 +1762,23 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
1753} 1762}
1754 1763
1755static int 1764static int
1756cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca, 1765cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1757 unsigned long *arg) 1766 unsigned long *arg)
1758{ 1767{
1759 int err; 1768 int err;
1760 struct cbq_sched_data *q = qdisc_priv(sch); 1769 struct cbq_sched_data *q = qdisc_priv(sch);
1761 struct cbq_class *cl = (struct cbq_class*)*arg; 1770 struct cbq_class *cl = (struct cbq_class*)*arg;
1762 struct rtattr *opt = tca[TCA_OPTIONS-1]; 1771 struct nlattr *opt = tca[TCA_OPTIONS];
1763 struct rtattr *tb[TCA_CBQ_MAX]; 1772 struct nlattr *tb[TCA_CBQ_MAX + 1];
1764 struct cbq_class *parent; 1773 struct cbq_class *parent;
1765 struct qdisc_rate_table *rtab = NULL; 1774 struct qdisc_rate_table *rtab = NULL;
1766 1775
1767 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt)) 1776 if (opt == NULL)
1768 return -EINVAL; 1777 return -EINVAL;
1769 1778
1770 if (tb[TCA_CBQ_OVL_STRATEGY-1] && 1779 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1771 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl)) 1780 if (err < 0)
1772 return -EINVAL; 1781 return err;
1773
1774 if (tb[TCA_CBQ_FOPT-1] &&
1775 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1776 return -EINVAL;
1777
1778 if (tb[TCA_CBQ_RATE-1] &&
1779 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1780 return -EINVAL;
1781
1782 if (tb[TCA_CBQ_LSSOPT-1] &&
1783 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1784 return -EINVAL;
1785
1786 if (tb[TCA_CBQ_WRROPT-1] &&
1787 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1788 return -EINVAL;
1789
1790#ifdef CONFIG_NET_CLS_ACT
1791 if (tb[TCA_CBQ_POLICE-1] &&
1792 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1793 return -EINVAL;
1794#endif
1795 1782
1796 if (cl) { 1783 if (cl) {
1797 /* Check parent */ 1784 /* Check parent */
@@ -1802,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1802 return -EINVAL; 1789 return -EINVAL;
1803 } 1790 }
1804 1791
1805 if (tb[TCA_CBQ_RATE-1]) { 1792 if (tb[TCA_CBQ_RATE]) {
1806 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]); 1793 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1807 if (rtab == NULL) 1794 if (rtab == NULL)
1808 return -EINVAL; 1795 return -EINVAL;
1809 } 1796 }
@@ -1819,45 +1806,45 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1819 qdisc_put_rtab(rtab); 1806 qdisc_put_rtab(rtab);
1820 } 1807 }
1821 1808
1822 if (tb[TCA_CBQ_LSSOPT-1]) 1809 if (tb[TCA_CBQ_LSSOPT])
1823 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); 1810 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1824 1811
1825 if (tb[TCA_CBQ_WRROPT-1]) { 1812 if (tb[TCA_CBQ_WRROPT]) {
1826 cbq_rmprio(q, cl); 1813 cbq_rmprio(q, cl);
1827 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1])); 1814 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1828 } 1815 }
1829 1816
1830 if (tb[TCA_CBQ_OVL_STRATEGY-1]) 1817 if (tb[TCA_CBQ_OVL_STRATEGY])
1831 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); 1818 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1832 1819
1833#ifdef CONFIG_NET_CLS_ACT 1820#ifdef CONFIG_NET_CLS_ACT
1834 if (tb[TCA_CBQ_POLICE-1]) 1821 if (tb[TCA_CBQ_POLICE])
1835 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); 1822 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1836#endif 1823#endif
1837 1824
1838 if (tb[TCA_CBQ_FOPT-1]) 1825 if (tb[TCA_CBQ_FOPT])
1839 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); 1826 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1840 1827
1841 if (cl->q->q.qlen) 1828 if (cl->q->q.qlen)
1842 cbq_activate_class(cl); 1829 cbq_activate_class(cl);
1843 1830
1844 sch_tree_unlock(sch); 1831 sch_tree_unlock(sch);
1845 1832
1846 if (tca[TCA_RATE-1]) 1833 if (tca[TCA_RATE])
1847 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1834 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1848 &sch->dev->queue_lock, 1835 &sch->dev->queue_lock,
1849 tca[TCA_RATE-1]); 1836 tca[TCA_RATE]);
1850 return 0; 1837 return 0;
1851 } 1838 }
1852 1839
1853 if (parentid == TC_H_ROOT) 1840 if (parentid == TC_H_ROOT)
1854 return -EINVAL; 1841 return -EINVAL;
1855 1842
1856 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL || 1843 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1857 tb[TCA_CBQ_LSSOPT-1] == NULL) 1844 tb[TCA_CBQ_LSSOPT] == NULL)
1858 return -EINVAL; 1845 return -EINVAL;
1859 1846
1860 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]); 1847 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1861 if (rtab == NULL) 1848 if (rtab == NULL)
1862 return -EINVAL; 1849 return -EINVAL;
1863 1850
@@ -1912,8 +1899,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1912 cl->share = cl->tparent; 1899 cl->share = cl->tparent;
1913 cbq_adjust_levels(parent); 1900 cbq_adjust_levels(parent);
1914 cl->minidle = -0x7FFFFFFF; 1901 cl->minidle = -0x7FFFFFFF;
1915 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); 1902 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1916 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1])); 1903 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1917 if (cl->ewma_log==0) 1904 if (cl->ewma_log==0)
1918 cl->ewma_log = q->link.ewma_log; 1905 cl->ewma_log = q->link.ewma_log;
1919 if (cl->maxidle==0) 1906 if (cl->maxidle==0)
@@ -1921,19 +1908,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1921 if (cl->avpkt==0) 1908 if (cl->avpkt==0)
1922 cl->avpkt = q->link.avpkt; 1909 cl->avpkt = q->link.avpkt;
1923 cl->overlimit = cbq_ovl_classic; 1910 cl->overlimit = cbq_ovl_classic;
1924 if (tb[TCA_CBQ_OVL_STRATEGY-1]) 1911 if (tb[TCA_CBQ_OVL_STRATEGY])
1925 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); 1912 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1926#ifdef CONFIG_NET_CLS_ACT 1913#ifdef CONFIG_NET_CLS_ACT
1927 if (tb[TCA_CBQ_POLICE-1]) 1914 if (tb[TCA_CBQ_POLICE])
1928 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); 1915 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1929#endif 1916#endif
1930 if (tb[TCA_CBQ_FOPT-1]) 1917 if (tb[TCA_CBQ_FOPT])
1931 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); 1918 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1932 sch_tree_unlock(sch); 1919 sch_tree_unlock(sch);
1933 1920
1934 if (tca[TCA_RATE-1]) 1921 if (tca[TCA_RATE])
1935 gen_new_estimator(&cl->bstats, &cl->rate_est, 1922 gen_new_estimator(&cl->bstats, &cl->rate_est,
1936 &sch->dev->queue_lock, tca[TCA_RATE-1]); 1923 &sch->dev->queue_lock, tca[TCA_RATE]);
1937 1924
1938 *arg = (unsigned long)cl; 1925 *arg = (unsigned long)cl;
1939 return 0; 1926 return 0;
@@ -2045,7 +2032,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2045 } 2032 }
2046} 2033}
2047 2034
2048static struct Qdisc_class_ops cbq_class_ops = { 2035static const struct Qdisc_class_ops cbq_class_ops = {
2049 .graft = cbq_graft, 2036 .graft = cbq_graft,
2050 .leaf = cbq_leaf, 2037 .leaf = cbq_leaf,
2051 .qlen_notify = cbq_qlen_notify, 2038 .qlen_notify = cbq_qlen_notify,
@@ -2061,7 +2048,7 @@ static struct Qdisc_class_ops cbq_class_ops = {
2061 .dump_stats = cbq_dump_class_stats, 2048 .dump_stats = cbq_dump_class_stats,
2062}; 2049};
2063 2050
2064static struct Qdisc_ops cbq_qdisc_ops = { 2051static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2065 .next = NULL, 2052 .next = NULL,
2066 .cl_ops = &cbq_class_ops, 2053 .cl_ops = &cbq_class_ops,
2067 .id = "cbq", 2054 .id = "cbq",
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 60f89199e3da..0df911fd67b1 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -10,28 +10,12 @@
10#include <linux/errno.h> 10#include <linux/errno.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/rtnetlink.h> 12#include <linux/rtnetlink.h>
13#include <linux/bitops.h>
13#include <net/pkt_sched.h> 14#include <net/pkt_sched.h>
14#include <net/dsfield.h> 15#include <net/dsfield.h>
15#include <net/inet_ecn.h> 16#include <net/inet_ecn.h>
16#include <asm/byteorder.h> 17#include <asm/byteorder.h>
17 18
18
19#if 0 /* control */
20#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
21#else
22#define DPRINTK(format,args...)
23#endif
24
25#if 0 /* data */
26#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
27#else
28#define D2PRINTK(format,args...)
29#endif
30
31
32#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
33
34
35/* 19/*
36 * classid class marking 20 * classid class marking
37 * ------- ----- ------- 21 * ------- ----- -------
@@ -60,17 +44,6 @@ struct dsmark_qdisc_data {
60 int set_tc_index; 44 int set_tc_index;
61}; 45};
62 46
63static inline int dsmark_valid_indices(u16 indices)
64{
65 while (indices != 1) {
66 if (indices & 1)
67 return 0;
68 indices >>= 1;
69 }
70
71 return 1;
72}
73
74static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) 47static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
75{ 48{
76 return (index <= p->indices && index > 0); 49 return (index <= p->indices && index > 0);
@@ -81,9 +54,9 @@ static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
81static int dsmark_graft(struct Qdisc *sch, unsigned long arg, 54static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
82 struct Qdisc *new, struct Qdisc **old) 55 struct Qdisc *new, struct Qdisc **old)
83{ 56{
84 struct dsmark_qdisc_data *p = PRIV(sch); 57 struct dsmark_qdisc_data *p = qdisc_priv(sch);
85 58
86 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n", 59 pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
87 sch, p, new, old); 60 sch, p, new, old);
88 61
89 if (new == NULL) { 62 if (new == NULL) {
@@ -104,13 +77,14 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
104 77
105static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) 78static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
106{ 79{
107 return PRIV(sch)->q; 80 struct dsmark_qdisc_data *p = qdisc_priv(sch);
81 return p->q;
108} 82}
109 83
110static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) 84static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
111{ 85{
112 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n", 86 pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
113 sch, PRIV(sch), classid); 87 sch, qdisc_priv(sch), classid);
114 88
115 return TC_H_MIN(classid) + 1; 89 return TC_H_MIN(classid) + 1;
116} 90}
@@ -125,44 +99,56 @@ static void dsmark_put(struct Qdisc *sch, unsigned long cl)
125{ 99{
126} 100}
127 101
102static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
103 [TCA_DSMARK_INDICES] = { .type = NLA_U16 },
104 [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 },
105 [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG },
106 [TCA_DSMARK_MASK] = { .type = NLA_U8 },
107 [TCA_DSMARK_VALUE] = { .type = NLA_U8 },
108};
109
128static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, 110static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
129 struct rtattr **tca, unsigned long *arg) 111 struct nlattr **tca, unsigned long *arg)
130{ 112{
131 struct dsmark_qdisc_data *p = PRIV(sch); 113 struct dsmark_qdisc_data *p = qdisc_priv(sch);
132 struct rtattr *opt = tca[TCA_OPTIONS-1]; 114 struct nlattr *opt = tca[TCA_OPTIONS];
133 struct rtattr *tb[TCA_DSMARK_MAX]; 115 struct nlattr *tb[TCA_DSMARK_MAX + 1];
134 int err = -EINVAL; 116 int err = -EINVAL;
135 u8 mask = 0; 117 u8 mask = 0;
136 118
137 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," 119 pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
138 "arg 0x%lx\n", sch, p, classid, parent, *arg); 120 "arg 0x%lx\n", sch, p, classid, parent, *arg);
139 121
140 if (!dsmark_valid_index(p, *arg)) { 122 if (!dsmark_valid_index(p, *arg)) {
141 err = -ENOENT; 123 err = -ENOENT;
142 goto rtattr_failure; 124 goto errout;
143 } 125 }
144 126
145 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) 127 if (!opt)
146 goto rtattr_failure; 128 goto errout;
147 129
148 if (tb[TCA_DSMARK_MASK-1]) 130 err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
149 mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]); 131 if (err < 0)
132 goto errout;
133
134 if (tb[TCA_DSMARK_MASK])
135 mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
150 136
151 if (tb[TCA_DSMARK_VALUE-1]) 137 if (tb[TCA_DSMARK_VALUE])
152 p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]); 138 p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
153 139
154 if (tb[TCA_DSMARK_MASK-1]) 140 if (tb[TCA_DSMARK_MASK])
155 p->mask[*arg-1] = mask; 141 p->mask[*arg-1] = mask;
156 142
157 err = 0; 143 err = 0;
158 144
159rtattr_failure: 145errout:
160 return err; 146 return err;
161} 147}
162 148
163static int dsmark_delete(struct Qdisc *sch, unsigned long arg) 149static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
164{ 150{
165 struct dsmark_qdisc_data *p = PRIV(sch); 151 struct dsmark_qdisc_data *p = qdisc_priv(sch);
166 152
167 if (!dsmark_valid_index(p, arg)) 153 if (!dsmark_valid_index(p, arg))
168 return -EINVAL; 154 return -EINVAL;
@@ -173,12 +159,12 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
173 return 0; 159 return 0;
174} 160}
175 161
176static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) 162static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
177{ 163{
178 struct dsmark_qdisc_data *p = PRIV(sch); 164 struct dsmark_qdisc_data *p = qdisc_priv(sch);
179 int i; 165 int i;
180 166
181 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); 167 pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
182 168
183 if (walker->stop) 169 if (walker->stop)
184 return; 170 return;
@@ -197,34 +183,42 @@ ignore:
197 } 183 }
198} 184}
199 185
200static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl) 186static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,
187 unsigned long cl)
201{ 188{
202 return &PRIV(sch)->filter_list; 189 struct dsmark_qdisc_data *p = qdisc_priv(sch);
190 return &p->filter_list;
203} 191}
204 192
205/* --------------------------- Qdisc operations ---------------------------- */ 193/* --------------------------- Qdisc operations ---------------------------- */
206 194
207static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) 195static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
208{ 196{
209 struct dsmark_qdisc_data *p = PRIV(sch); 197 struct dsmark_qdisc_data *p = qdisc_priv(sch);
210 int err; 198 int err;
211 199
212 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 200 pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
213 201
214 if (p->set_tc_index) { 202 if (p->set_tc_index) {
215 /* FIXME: Safe with non-linear skbs? --RR */
216 switch (skb->protocol) { 203 switch (skb->protocol) {
217 case __constant_htons(ETH_P_IP): 204 case __constant_htons(ETH_P_IP):
218 skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) 205 if (skb_cow_head(skb, sizeof(struct iphdr)))
219 & ~INET_ECN_MASK; 206 goto drop;
220 break; 207
221 case __constant_htons(ETH_P_IPV6): 208 skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
222 skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) 209 & ~INET_ECN_MASK;
223 & ~INET_ECN_MASK; 210 break;
224 break; 211
225 default: 212 case __constant_htons(ETH_P_IPV6):
226 skb->tc_index = 0; 213 if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
227 break; 214 goto drop;
215
216 skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
217 & ~INET_ECN_MASK;
218 break;
219 default:
220 skb->tc_index = 0;
221 break;
228 } 222 }
229 } 223 }
230 224
@@ -234,7 +228,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
234 struct tcf_result res; 228 struct tcf_result res;
235 int result = tc_classify(skb, p->filter_list, &res); 229 int result = tc_classify(skb, p->filter_list, &res);
236 230
237 D2PRINTK("result %d class 0x%04x\n", result, res.classid); 231 pr_debug("result %d class 0x%04x\n", result, res.classid);
238 232
239 switch (result) { 233 switch (result) {
240#ifdef CONFIG_NET_CLS_ACT 234#ifdef CONFIG_NET_CLS_ACT
@@ -242,14 +236,14 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
242 case TC_ACT_STOLEN: 236 case TC_ACT_STOLEN:
243 kfree_skb(skb); 237 kfree_skb(skb);
244 return NET_XMIT_SUCCESS; 238 return NET_XMIT_SUCCESS;
239
245 case TC_ACT_SHOT: 240 case TC_ACT_SHOT:
246 kfree_skb(skb); 241 goto drop;
247 sch->qstats.drops++;
248 return NET_XMIT_BYPASS;
249#endif 242#endif
250 case TC_ACT_OK: 243 case TC_ACT_OK:
251 skb->tc_index = TC_H_MIN(res.classid); 244 skb->tc_index = TC_H_MIN(res.classid);
252 break; 245 break;
246
253 default: 247 default:
254 if (p->default_index != NO_DEFAULT_INDEX) 248 if (p->default_index != NO_DEFAULT_INDEX)
255 skb->tc_index = p->default_index; 249 skb->tc_index = p->default_index;
@@ -257,7 +251,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
257 } 251 }
258 } 252 }
259 253
260 err = p->q->enqueue(skb,p->q); 254 err = p->q->enqueue(skb, p->q);
261 if (err != NET_XMIT_SUCCESS) { 255 if (err != NET_XMIT_SUCCESS) {
262 sch->qstats.drops++; 256 sch->qstats.drops++;
263 return err; 257 return err;
@@ -268,15 +262,20 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
268 sch->q.qlen++; 262 sch->q.qlen++;
269 263
270 return NET_XMIT_SUCCESS; 264 return NET_XMIT_SUCCESS;
265
266drop:
267 kfree_skb(skb);
268 sch->qstats.drops++;
269 return NET_XMIT_BYPASS;
271} 270}
272 271
273static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) 272static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
274{ 273{
275 struct dsmark_qdisc_data *p = PRIV(sch); 274 struct dsmark_qdisc_data *p = qdisc_priv(sch);
276 struct sk_buff *skb; 275 struct sk_buff *skb;
277 u32 index; 276 u32 index;
278 277
279 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p); 278 pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
280 279
281 skb = p->q->ops->dequeue(p->q); 280 skb = p->q->ops->dequeue(p->q);
282 if (skb == NULL) 281 if (skb == NULL)
@@ -285,39 +284,39 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
285 sch->q.qlen--; 284 sch->q.qlen--;
286 285
287 index = skb->tc_index & (p->indices - 1); 286 index = skb->tc_index & (p->indices - 1);
288 D2PRINTK("index %d->%d\n", skb->tc_index, index); 287 pr_debug("index %d->%d\n", skb->tc_index, index);
289 288
290 switch (skb->protocol) { 289 switch (skb->protocol) {
291 case __constant_htons(ETH_P_IP): 290 case __constant_htons(ETH_P_IP):
292 ipv4_change_dsfield(ip_hdr(skb), p->mask[index], 291 ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
293 p->value[index]); 292 p->value[index]);
294 break;
295 case __constant_htons(ETH_P_IPV6):
296 ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
297 p->value[index]);
298 break; 293 break;
299 default: 294 case __constant_htons(ETH_P_IPV6):
300 /* 295 ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
301 * Only complain if a change was actually attempted. 296 p->value[index]);
302 * This way, we can send non-IP traffic through dsmark
303 * and don't need yet another qdisc as a bypass.
304 */
305 if (p->mask[index] != 0xff || p->value[index])
306 printk(KERN_WARNING "dsmark_dequeue: "
307 "unsupported protocol %d\n",
308 ntohs(skb->protocol));
309 break; 297 break;
298 default:
299 /*
300 * Only complain if a change was actually attempted.
301 * This way, we can send non-IP traffic through dsmark
302 * and don't need yet another qdisc as a bypass.
303 */
304 if (p->mask[index] != 0xff || p->value[index])
305 printk(KERN_WARNING
306 "dsmark_dequeue: unsupported protocol %d\n",
307 ntohs(skb->protocol));
308 break;
310 } 309 }
311 310
312 return skb; 311 return skb;
313} 312}
314 313
315static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch) 314static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
316{ 315{
317 struct dsmark_qdisc_data *p = PRIV(sch); 316 struct dsmark_qdisc_data *p = qdisc_priv(sch);
318 int err; 317 int err;
319 318
320 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 319 pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
321 320
322 err = p->q->ops->requeue(skb, p->q); 321 err = p->q->ops->requeue(skb, p->q);
323 if (err != NET_XMIT_SUCCESS) { 322 if (err != NET_XMIT_SUCCESS) {
@@ -333,10 +332,10 @@ static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
333 332
334static unsigned int dsmark_drop(struct Qdisc *sch) 333static unsigned int dsmark_drop(struct Qdisc *sch)
335{ 334{
336 struct dsmark_qdisc_data *p = PRIV(sch); 335 struct dsmark_qdisc_data *p = qdisc_priv(sch);
337 unsigned int len; 336 unsigned int len;
338 337
339 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); 338 pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
340 339
341 if (p->q->ops->drop == NULL) 340 if (p->q->ops->drop == NULL)
342 return 0; 341 return 0;
@@ -348,26 +347,32 @@ static unsigned int dsmark_drop(struct Qdisc *sch)
348 return len; 347 return len;
349} 348}
350 349
351static int dsmark_init(struct Qdisc *sch, struct rtattr *opt) 350static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
352{ 351{
353 struct dsmark_qdisc_data *p = PRIV(sch); 352 struct dsmark_qdisc_data *p = qdisc_priv(sch);
354 struct rtattr *tb[TCA_DSMARK_MAX]; 353 struct nlattr *tb[TCA_DSMARK_MAX + 1];
355 int err = -EINVAL; 354 int err = -EINVAL;
356 u32 default_index = NO_DEFAULT_INDEX; 355 u32 default_index = NO_DEFAULT_INDEX;
357 u16 indices; 356 u16 indices;
358 u8 *mask; 357 u8 *mask;
359 358
360 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); 359 pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
361 360
362 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0) 361 if (!opt)
363 goto errout; 362 goto errout;
364 363
365 indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]); 364 err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
366 if (!indices || !dsmark_valid_indices(indices)) 365 if (err < 0)
366 goto errout;
367
368 err = -EINVAL;
369 indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
370
371 if (hweight32(indices) != 1)
367 goto errout; 372 goto errout;
368 373
369 if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) 374 if (tb[TCA_DSMARK_DEFAULT_INDEX])
370 default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]); 375 default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
371 376
372 mask = kmalloc(indices * 2, GFP_KERNEL); 377 mask = kmalloc(indices * 2, GFP_KERNEL);
373 if (mask == NULL) { 378 if (mask == NULL) {
@@ -383,34 +388,33 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
383 388
384 p->indices = indices; 389 p->indices = indices;
385 p->default_index = default_index; 390 p->default_index = default_index;
386 p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]); 391 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
387 392
388 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); 393 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
389 if (p->q == NULL) 394 if (p->q == NULL)
390 p->q = &noop_qdisc; 395 p->q = &noop_qdisc;
391 396
392 DPRINTK("dsmark_init: qdisc %p\n", p->q); 397 pr_debug("dsmark_init: qdisc %p\n", p->q);
393 398
394 err = 0; 399 err = 0;
395errout: 400errout:
396rtattr_failure:
397 return err; 401 return err;
398} 402}
399 403
400static void dsmark_reset(struct Qdisc *sch) 404static void dsmark_reset(struct Qdisc *sch)
401{ 405{
402 struct dsmark_qdisc_data *p = PRIV(sch); 406 struct dsmark_qdisc_data *p = qdisc_priv(sch);
403 407
404 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); 408 pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
405 qdisc_reset(p->q); 409 qdisc_reset(p->q);
406 sch->q.qlen = 0; 410 sch->q.qlen = 0;
407} 411}
408 412
409static void dsmark_destroy(struct Qdisc *sch) 413static void dsmark_destroy(struct Qdisc *sch)
410{ 414{
411 struct dsmark_qdisc_data *p = PRIV(sch); 415 struct dsmark_qdisc_data *p = qdisc_priv(sch);
412 416
413 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); 417 pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
414 418
415 tcf_destroy_chain(p->filter_list); 419 tcf_destroy_chain(p->filter_list);
416 qdisc_destroy(p->q); 420 qdisc_destroy(p->q);
@@ -420,10 +424,10 @@ static void dsmark_destroy(struct Qdisc *sch)
420static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, 424static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
421 struct sk_buff *skb, struct tcmsg *tcm) 425 struct sk_buff *skb, struct tcmsg *tcm)
422{ 426{
423 struct dsmark_qdisc_data *p = PRIV(sch); 427 struct dsmark_qdisc_data *p = qdisc_priv(sch);
424 struct rtattr *opts = NULL; 428 struct nlattr *opts = NULL;
425 429
426 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl); 430 pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
427 431
428 if (!dsmark_valid_index(p, cl)) 432 if (!dsmark_valid_index(p, cl))
429 return -EINVAL; 433 return -EINVAL;
@@ -431,37 +435,41 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
431 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); 435 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
432 tcm->tcm_info = p->q->handle; 436 tcm->tcm_info = p->q->handle;
433 437
434 opts = RTA_NEST(skb, TCA_OPTIONS); 438 opts = nla_nest_start(skb, TCA_OPTIONS);
435 RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]); 439 if (opts == NULL)
436 RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]); 440 goto nla_put_failure;
441 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
442 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
437 443
438 return RTA_NEST_END(skb, opts); 444 return nla_nest_end(skb, opts);
439 445
440rtattr_failure: 446nla_put_failure:
441 return RTA_NEST_CANCEL(skb, opts); 447 return nla_nest_cancel(skb, opts);
442} 448}
443 449
444static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 450static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
445{ 451{
446 struct dsmark_qdisc_data *p = PRIV(sch); 452 struct dsmark_qdisc_data *p = qdisc_priv(sch);
447 struct rtattr *opts = NULL; 453 struct nlattr *opts = NULL;
448 454
449 opts = RTA_NEST(skb, TCA_OPTIONS); 455 opts = nla_nest_start(skb, TCA_OPTIONS);
450 RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices); 456 if (opts == NULL)
457 goto nla_put_failure;
458 NLA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
451 459
452 if (p->default_index != NO_DEFAULT_INDEX) 460 if (p->default_index != NO_DEFAULT_INDEX)
453 RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index); 461 NLA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
454 462
455 if (p->set_tc_index) 463 if (p->set_tc_index)
456 RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX); 464 NLA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
457 465
458 return RTA_NEST_END(skb, opts); 466 return nla_nest_end(skb, opts);
459 467
460rtattr_failure: 468nla_put_failure:
461 return RTA_NEST_CANCEL(skb, opts); 469 return nla_nest_cancel(skb, opts);
462} 470}
463 471
464static struct Qdisc_class_ops dsmark_class_ops = { 472static const struct Qdisc_class_ops dsmark_class_ops = {
465 .graft = dsmark_graft, 473 .graft = dsmark_graft,
466 .leaf = dsmark_leaf, 474 .leaf = dsmark_leaf,
467 .get = dsmark_get, 475 .get = dsmark_get,
@@ -475,7 +483,7 @@ static struct Qdisc_class_ops dsmark_class_ops = {
475 .dump = dsmark_dump_class, 483 .dump = dsmark_dump_class,
476}; 484};
477 485
478static struct Qdisc_ops dsmark_qdisc_ops = { 486static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
479 .next = NULL, 487 .next = NULL,
480 .cl_ops = &dsmark_class_ops, 488 .cl_ops = &dsmark_class_ops,
481 .id = "dsmark", 489 .id = "dsmark",
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index c264308f17c1..95ed48221652 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -43,7 +43,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
43 return qdisc_reshape_fail(skb, sch); 43 return qdisc_reshape_fail(skb, sch);
44} 44}
45 45
46static int fifo_init(struct Qdisc *sch, struct rtattr *opt) 46static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
47{ 47{
48 struct fifo_sched_data *q = qdisc_priv(sch); 48 struct fifo_sched_data *q = qdisc_priv(sch);
49 49
@@ -55,9 +55,9 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
55 55
56 q->limit = limit; 56 q->limit = limit;
57 } else { 57 } else {
58 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 58 struct tc_fifo_qopt *ctl = nla_data(opt);
59 59
60 if (RTA_PAYLOAD(opt) < sizeof(*ctl)) 60 if (nla_len(opt) < sizeof(*ctl))
61 return -EINVAL; 61 return -EINVAL;
62 62
63 q->limit = ctl->limit; 63 q->limit = ctl->limit;
@@ -71,14 +71,14 @@ static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
71 struct fifo_sched_data *q = qdisc_priv(sch); 71 struct fifo_sched_data *q = qdisc_priv(sch);
72 struct tc_fifo_qopt opt = { .limit = q->limit }; 72 struct tc_fifo_qopt opt = { .limit = q->limit };
73 73
74 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 74 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
75 return skb->len; 75 return skb->len;
76 76
77rtattr_failure: 77nla_put_failure:
78 return -1; 78 return -1;
79} 79}
80 80
81struct Qdisc_ops pfifo_qdisc_ops = { 81struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
82 .id = "pfifo", 82 .id = "pfifo",
83 .priv_size = sizeof(struct fifo_sched_data), 83 .priv_size = sizeof(struct fifo_sched_data),
84 .enqueue = pfifo_enqueue, 84 .enqueue = pfifo_enqueue,
@@ -91,8 +91,9 @@ struct Qdisc_ops pfifo_qdisc_ops = {
91 .dump = fifo_dump, 91 .dump = fifo_dump,
92 .owner = THIS_MODULE, 92 .owner = THIS_MODULE,
93}; 93};
94EXPORT_SYMBOL(pfifo_qdisc_ops);
94 95
95struct Qdisc_ops bfifo_qdisc_ops = { 96struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
96 .id = "bfifo", 97 .id = "bfifo",
97 .priv_size = sizeof(struct fifo_sched_data), 98 .priv_size = sizeof(struct fifo_sched_data),
98 .enqueue = bfifo_enqueue, 99 .enqueue = bfifo_enqueue,
@@ -105,6 +106,4 @@ struct Qdisc_ops bfifo_qdisc_ops = {
105 .dump = fifo_dump, 106 .dump = fifo_dump,
106 .owner = THIS_MODULE, 107 .owner = THIS_MODULE,
107}; 108};
108
109EXPORT_SYMBOL(bfifo_qdisc_ops); 109EXPORT_SYMBOL(bfifo_qdisc_ops);
110EXPORT_SYMBOL(pfifo_qdisc_ops);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fa1a6f45dc41..10b5c0887fff 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -40,16 +40,22 @@
40 */ 40 */
41 41
42void qdisc_lock_tree(struct net_device *dev) 42void qdisc_lock_tree(struct net_device *dev)
43 __acquires(dev->queue_lock)
44 __acquires(dev->ingress_lock)
43{ 45{
44 spin_lock_bh(&dev->queue_lock); 46 spin_lock_bh(&dev->queue_lock);
45 spin_lock(&dev->ingress_lock); 47 spin_lock(&dev->ingress_lock);
46} 48}
49EXPORT_SYMBOL(qdisc_lock_tree);
47 50
48void qdisc_unlock_tree(struct net_device *dev) 51void qdisc_unlock_tree(struct net_device *dev)
52 __releases(dev->ingress_lock)
53 __releases(dev->queue_lock)
49{ 54{
50 spin_unlock(&dev->ingress_lock); 55 spin_unlock(&dev->ingress_lock);
51 spin_unlock_bh(&dev->queue_lock); 56 spin_unlock_bh(&dev->queue_lock);
52} 57}
58EXPORT_SYMBOL(qdisc_unlock_tree);
53 59
54static inline int qdisc_qlen(struct Qdisc *q) 60static inline int qdisc_qlen(struct Qdisc *q)
55{ 61{
@@ -134,7 +140,7 @@ static inline int qdisc_restart(struct net_device *dev)
134{ 140{
135 struct Qdisc *q = dev->qdisc; 141 struct Qdisc *q = dev->qdisc;
136 struct sk_buff *skb; 142 struct sk_buff *skb;
137 int ret; 143 int ret = NETDEV_TX_BUSY;
138 144
139 /* Dequeue packet */ 145 /* Dequeue packet */
140 if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) 146 if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
@@ -145,7 +151,8 @@ static inline int qdisc_restart(struct net_device *dev)
145 spin_unlock(&dev->queue_lock); 151 spin_unlock(&dev->queue_lock);
146 152
147 HARD_TX_LOCK(dev, smp_processor_id()); 153 HARD_TX_LOCK(dev, smp_processor_id());
148 ret = dev_hard_start_xmit(skb, dev); 154 if (!netif_subqueue_stopped(dev, skb))
155 ret = dev_hard_start_xmit(skb, dev);
149 HARD_TX_UNLOCK(dev); 156 HARD_TX_UNLOCK(dev);
150 157
151 spin_lock(&dev->queue_lock); 158 spin_lock(&dev->queue_lock);
@@ -210,13 +217,6 @@ static void dev_watchdog(unsigned long arg)
210 dev_put(dev); 217 dev_put(dev);
211} 218}
212 219
213static void dev_watchdog_init(struct net_device *dev)
214{
215 init_timer(&dev->watchdog_timer);
216 dev->watchdog_timer.data = (unsigned long)dev;
217 dev->watchdog_timer.function = dev_watchdog;
218}
219
220void __netdev_watchdog_up(struct net_device *dev) 220void __netdev_watchdog_up(struct net_device *dev)
221{ 221{
222 if (dev->tx_timeout) { 222 if (dev->tx_timeout) {
@@ -255,6 +255,7 @@ void netif_carrier_on(struct net_device *dev)
255 __netdev_watchdog_up(dev); 255 __netdev_watchdog_up(dev);
256 } 256 }
257} 257}
258EXPORT_SYMBOL(netif_carrier_on);
258 259
259/** 260/**
260 * netif_carrier_off - clear carrier 261 * netif_carrier_off - clear carrier
@@ -267,6 +268,7 @@ void netif_carrier_off(struct net_device *dev)
267 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) 268 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
268 linkwatch_fire_event(dev); 269 linkwatch_fire_event(dev);
269} 270}
271EXPORT_SYMBOL(netif_carrier_off);
270 272
271/* "NOOP" scheduler: the best scheduler, recommended for all interfaces 273/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
272 under all circumstances. It is difficult to invent anything faster or 274 under all circumstances. It is difficult to invent anything faster or
@@ -293,7 +295,7 @@ static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
293 return NET_XMIT_CN; 295 return NET_XMIT_CN;
294} 296}
295 297
296struct Qdisc_ops noop_qdisc_ops = { 298struct Qdisc_ops noop_qdisc_ops __read_mostly = {
297 .id = "noop", 299 .id = "noop",
298 .priv_size = 0, 300 .priv_size = 0,
299 .enqueue = noop_enqueue, 301 .enqueue = noop_enqueue,
@@ -309,8 +311,9 @@ struct Qdisc noop_qdisc = {
309 .ops = &noop_qdisc_ops, 311 .ops = &noop_qdisc_ops,
310 .list = LIST_HEAD_INIT(noop_qdisc.list), 312 .list = LIST_HEAD_INIT(noop_qdisc.list),
311}; 313};
314EXPORT_SYMBOL(noop_qdisc);
312 315
313static struct Qdisc_ops noqueue_qdisc_ops = { 316static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
314 .id = "noqueue", 317 .id = "noqueue",
315 .priv_size = 0, 318 .priv_size = 0,
316 .enqueue = noop_enqueue, 319 .enqueue = noop_enqueue,
@@ -394,14 +397,14 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
394 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; 397 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
395 398
396 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 399 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
397 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 400 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
398 return skb->len; 401 return skb->len;
399 402
400rtattr_failure: 403nla_put_failure:
401 return -1; 404 return -1;
402} 405}
403 406
404static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) 407static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
405{ 408{
406 int prio; 409 int prio;
407 struct sk_buff_head *list = qdisc_priv(qdisc); 410 struct sk_buff_head *list = qdisc_priv(qdisc);
@@ -412,7 +415,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
412 return 0; 415 return 0;
413} 416}
414 417
415static struct Qdisc_ops pfifo_fast_ops = { 418static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
416 .id = "pfifo_fast", 419 .id = "pfifo_fast",
417 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), 420 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
418 .enqueue = pfifo_fast_enqueue, 421 .enqueue = pfifo_fast_enqueue,
@@ -473,16 +476,18 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
473errout: 476errout:
474 return NULL; 477 return NULL;
475} 478}
479EXPORT_SYMBOL(qdisc_create_dflt);
476 480
477/* Under dev->queue_lock and BH! */ 481/* Under dev->queue_lock and BH! */
478 482
479void qdisc_reset(struct Qdisc *qdisc) 483void qdisc_reset(struct Qdisc *qdisc)
480{ 484{
481 struct Qdisc_ops *ops = qdisc->ops; 485 const struct Qdisc_ops *ops = qdisc->ops;
482 486
483 if (ops->reset) 487 if (ops->reset)
484 ops->reset(qdisc); 488 ops->reset(qdisc);
485} 489}
490EXPORT_SYMBOL(qdisc_reset);
486 491
487/* this is the rcu callback function to clean up a qdisc when there 492/* this is the rcu callback function to clean up a qdisc when there
488 * are no further references to it */ 493 * are no further references to it */
@@ -497,7 +502,7 @@ static void __qdisc_destroy(struct rcu_head *head)
497 502
498void qdisc_destroy(struct Qdisc *qdisc) 503void qdisc_destroy(struct Qdisc *qdisc)
499{ 504{
500 struct Qdisc_ops *ops = qdisc->ops; 505 const struct Qdisc_ops *ops = qdisc->ops;
501 506
502 if (qdisc->flags & TCQ_F_BUILTIN || 507 if (qdisc->flags & TCQ_F_BUILTIN ||
503 !atomic_dec_and_test(&qdisc->refcnt)) 508 !atomic_dec_and_test(&qdisc->refcnt))
@@ -514,6 +519,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
514 dev_put(qdisc->dev); 519 dev_put(qdisc->dev);
515 call_rcu(&qdisc->q_rcu, __qdisc_destroy); 520 call_rcu(&qdisc->q_rcu, __qdisc_destroy);
516} 521}
522EXPORT_SYMBOL(qdisc_destroy);
517 523
518void dev_activate(struct net_device *dev) 524void dev_activate(struct net_device *dev)
519{ 525{
@@ -607,7 +613,7 @@ void dev_init_scheduler(struct net_device *dev)
607 INIT_LIST_HEAD(&dev->qdisc_list); 613 INIT_LIST_HEAD(&dev->qdisc_list);
608 qdisc_unlock_tree(dev); 614 qdisc_unlock_tree(dev);
609 615
610 dev_watchdog_init(dev); 616 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
611} 617}
612 618
613void dev_shutdown(struct net_device *dev) 619void dev_shutdown(struct net_device *dev)
@@ -628,12 +634,3 @@ void dev_shutdown(struct net_device *dev)
628 BUG_TRAP(!timer_pending(&dev->watchdog_timer)); 634 BUG_TRAP(!timer_pending(&dev->watchdog_timer));
629 qdisc_unlock_tree(dev); 635 qdisc_unlock_tree(dev);
630} 636}
631
632EXPORT_SYMBOL(netif_carrier_on);
633EXPORT_SYMBOL(netif_carrier_off);
634EXPORT_SYMBOL(noop_qdisc);
635EXPORT_SYMBOL(qdisc_create_dflt);
636EXPORT_SYMBOL(qdisc_destroy);
637EXPORT_SYMBOL(qdisc_reset);
638EXPORT_SYMBOL(qdisc_lock_tree);
639EXPORT_SYMBOL(qdisc_unlock_tree);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 3cc6dda02e2e..3a9d226ff1e4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -350,16 +350,16 @@ static inline void gred_destroy_vq(struct gred_sched_data *q)
350 kfree(q); 350 kfree(q);
351} 351}
352 352
353static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps) 353static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
354{ 354{
355 struct gred_sched *table = qdisc_priv(sch); 355 struct gred_sched *table = qdisc_priv(sch);
356 struct tc_gred_sopt *sopt; 356 struct tc_gred_sopt *sopt;
357 int i; 357 int i;
358 358
359 if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt)) 359 if (dps == NULL)
360 return -EINVAL; 360 return -EINVAL;
361 361
362 sopt = RTA_DATA(dps); 362 sopt = nla_data(dps);
363 363
364 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 364 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
365 return -EINVAL; 365 return -EINVAL;
@@ -425,28 +425,37 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
425 return 0; 425 return 0;
426} 426}
427 427
428static int gred_change(struct Qdisc *sch, struct rtattr *opt) 428static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
429 [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) },
430 [TCA_GRED_STAB] = { .len = 256 },
431 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
432};
433
434static int gred_change(struct Qdisc *sch, struct nlattr *opt)
429{ 435{
430 struct gred_sched *table = qdisc_priv(sch); 436 struct gred_sched *table = qdisc_priv(sch);
431 struct tc_gred_qopt *ctl; 437 struct tc_gred_qopt *ctl;
432 struct rtattr *tb[TCA_GRED_MAX]; 438 struct nlattr *tb[TCA_GRED_MAX + 1];
433 int err = -EINVAL, prio = GRED_DEF_PRIO; 439 int err, prio = GRED_DEF_PRIO;
434 u8 *stab; 440 u8 *stab;
435 441
436 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 442 if (opt == NULL)
437 return -EINVAL; 443 return -EINVAL;
438 444
439 if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL) 445 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
446 if (err < 0)
447 return err;
448
449 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
440 return gred_change_table_def(sch, opt); 450 return gred_change_table_def(sch, opt);
441 451
442 if (tb[TCA_GRED_PARMS-1] == NULL || 452 if (tb[TCA_GRED_PARMS] == NULL ||
443 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || 453 tb[TCA_GRED_STAB] == NULL)
444 tb[TCA_GRED_STAB-1] == NULL ||
445 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
446 return -EINVAL; 454 return -EINVAL;
447 455
448 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); 456 err = -EINVAL;
449 stab = RTA_DATA(tb[TCA_GRED_STAB-1]); 457 ctl = nla_data(tb[TCA_GRED_PARMS]);
458 stab = nla_data(tb[TCA_GRED_STAB]);
450 459
451 if (ctl->DP >= table->DPs) 460 if (ctl->DP >= table->DPs)
452 goto errout; 461 goto errout;
@@ -486,23 +495,28 @@ errout:
486 return err; 495 return err;
487} 496}
488 497
489static int gred_init(struct Qdisc *sch, struct rtattr *opt) 498static int gred_init(struct Qdisc *sch, struct nlattr *opt)
490{ 499{
491 struct rtattr *tb[TCA_GRED_MAX]; 500 struct nlattr *tb[TCA_GRED_MAX + 1];
501 int err;
492 502
493 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 503 if (opt == NULL)
494 return -EINVAL; 504 return -EINVAL;
495 505
496 if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1]) 506 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
507 if (err < 0)
508 return err;
509
510 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
497 return -EINVAL; 511 return -EINVAL;
498 512
499 return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]); 513 return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
500} 514}
501 515
502static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 516static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
503{ 517{
504 struct gred_sched *table = qdisc_priv(sch); 518 struct gred_sched *table = qdisc_priv(sch);
505 struct rtattr *parms, *opts = NULL; 519 struct nlattr *parms, *opts = NULL;
506 int i; 520 int i;
507 struct tc_gred_sopt sopt = { 521 struct tc_gred_sopt sopt = {
508 .DPs = table->DPs, 522 .DPs = table->DPs,
@@ -511,9 +525,13 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
511 .flags = table->red_flags, 525 .flags = table->red_flags,
512 }; 526 };
513 527
514 opts = RTA_NEST(skb, TCA_OPTIONS); 528 opts = nla_nest_start(skb, TCA_OPTIONS);
515 RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); 529 if (opts == NULL)
516 parms = RTA_NEST(skb, TCA_GRED_PARMS); 530 goto nla_put_failure;
531 NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
532 parms = nla_nest_start(skb, TCA_GRED_PARMS);
533 if (parms == NULL)
534 goto nla_put_failure;
517 535
518 for (i = 0; i < MAX_DPs; i++) { 536 for (i = 0; i < MAX_DPs; i++) {
519 struct gred_sched_data *q = table->tab[i]; 537 struct gred_sched_data *q = table->tab[i];
@@ -555,15 +573,16 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
555 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); 573 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
556 574
557append_opt: 575append_opt:
558 RTA_APPEND(skb, sizeof(opt), &opt); 576 if (nla_append(skb, sizeof(opt), &opt) < 0)
577 goto nla_put_failure;
559 } 578 }
560 579
561 RTA_NEST_END(skb, parms); 580 nla_nest_end(skb, parms);
562 581
563 return RTA_NEST_END(skb, opts); 582 return nla_nest_end(skb, opts);
564 583
565rtattr_failure: 584nla_put_failure:
566 return RTA_NEST_CANCEL(skb, opts); 585 return nla_nest_cancel(skb, opts);
567} 586}
568 587
569static void gred_destroy(struct Qdisc *sch) 588static void gred_destroy(struct Qdisc *sch)
@@ -577,7 +596,7 @@ static void gred_destroy(struct Qdisc *sch)
577 } 596 }
578} 597}
579 598
580static struct Qdisc_ops gred_qdisc_ops = { 599static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
581 .id = "gred", 600 .id = "gred",
582 .priv_size = sizeof(struct gred_sched), 601 .priv_size = sizeof(struct gred_sched),
583 .enqueue = gred_enqueue, 602 .enqueue = gred_enqueue,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 55e7e4530f43..87293d0db1d7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -160,7 +160,7 @@ struct hfsc_class
160 u64 cl_vtoff; /* inter-period cumulative vt offset */ 160 u64 cl_vtoff; /* inter-period cumulative vt offset */
161 u64 cl_cvtmax; /* max child's vt in the last period */ 161 u64 cl_cvtmax; /* max child's vt in the last period */
162 u64 cl_cvtoff; /* cumulative cvtmax of all periods */ 162 u64 cl_cvtoff; /* cumulative cvtmax of all periods */
163 u64 cl_pcvtoff; /* parent's cvtoff at initalization 163 u64 cl_pcvtoff; /* parent's cvtoff at initialization
164 time */ 164 time */
165 165
166 struct internal_sc cl_rsc; /* internal real-time service curve */ 166 struct internal_sc cl_rsc; /* internal real-time service curve */
@@ -986,41 +986,46 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
986 cl->cl_flags |= HFSC_USC; 986 cl->cl_flags |= HFSC_USC;
987} 987}
988 988
989static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
990 [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
991 [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
992 [TCA_HFSC_USC] = { .len = sizeof(struct tc_service_curve) },
993};
994
989static int 995static int
990hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 996hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
991 struct rtattr **tca, unsigned long *arg) 997 struct nlattr **tca, unsigned long *arg)
992{ 998{
993 struct hfsc_sched *q = qdisc_priv(sch); 999 struct hfsc_sched *q = qdisc_priv(sch);
994 struct hfsc_class *cl = (struct hfsc_class *)*arg; 1000 struct hfsc_class *cl = (struct hfsc_class *)*arg;
995 struct hfsc_class *parent = NULL; 1001 struct hfsc_class *parent = NULL;
996 struct rtattr *opt = tca[TCA_OPTIONS-1]; 1002 struct nlattr *opt = tca[TCA_OPTIONS];
997 struct rtattr *tb[TCA_HFSC_MAX]; 1003 struct nlattr *tb[TCA_HFSC_MAX + 1];
998 struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL; 1004 struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
999 u64 cur_time; 1005 u64 cur_time;
1006 int err;
1000 1007
1001 if (opt == NULL || rtattr_parse_nested(tb, TCA_HFSC_MAX, opt)) 1008 if (opt == NULL)
1002 return -EINVAL; 1009 return -EINVAL;
1003 1010
1004 if (tb[TCA_HFSC_RSC-1]) { 1011 err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy);
1005 if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc)) 1012 if (err < 0)
1006 return -EINVAL; 1013 return err;
1007 rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]); 1014
1015 if (tb[TCA_HFSC_RSC]) {
1016 rsc = nla_data(tb[TCA_HFSC_RSC]);
1008 if (rsc->m1 == 0 && rsc->m2 == 0) 1017 if (rsc->m1 == 0 && rsc->m2 == 0)
1009 rsc = NULL; 1018 rsc = NULL;
1010 } 1019 }
1011 1020
1012 if (tb[TCA_HFSC_FSC-1]) { 1021 if (tb[TCA_HFSC_FSC]) {
1013 if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc)) 1022 fsc = nla_data(tb[TCA_HFSC_FSC]);
1014 return -EINVAL;
1015 fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]);
1016 if (fsc->m1 == 0 && fsc->m2 == 0) 1023 if (fsc->m1 == 0 && fsc->m2 == 0)
1017 fsc = NULL; 1024 fsc = NULL;
1018 } 1025 }
1019 1026
1020 if (tb[TCA_HFSC_USC-1]) { 1027 if (tb[TCA_HFSC_USC]) {
1021 if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc)) 1028 usc = nla_data(tb[TCA_HFSC_USC]);
1022 return -EINVAL;
1023 usc = RTA_DATA(tb[TCA_HFSC_USC-1]);
1024 if (usc->m1 == 0 && usc->m2 == 0) 1029 if (usc->m1 == 0 && usc->m2 == 0)
1025 usc = NULL; 1030 usc = NULL;
1026 } 1031 }
@@ -1050,10 +1055,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1050 } 1055 }
1051 sch_tree_unlock(sch); 1056 sch_tree_unlock(sch);
1052 1057
1053 if (tca[TCA_RATE-1]) 1058 if (tca[TCA_RATE])
1054 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1059 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1055 &sch->dev->queue_lock, 1060 &sch->dev->queue_lock,
1056 tca[TCA_RATE-1]); 1061 tca[TCA_RATE]);
1057 return 0; 1062 return 0;
1058 } 1063 }
1059 1064
@@ -1106,9 +1111,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1106 cl->cl_pcvtoff = parent->cl_cvtoff; 1111 cl->cl_pcvtoff = parent->cl_cvtoff;
1107 sch_tree_unlock(sch); 1112 sch_tree_unlock(sch);
1108 1113
1109 if (tca[TCA_RATE-1]) 1114 if (tca[TCA_RATE])
1110 gen_new_estimator(&cl->bstats, &cl->rate_est, 1115 gen_new_estimator(&cl->bstats, &cl->rate_est,
1111 &sch->dev->queue_lock, tca[TCA_RATE-1]); 1116 &sch->dev->queue_lock, tca[TCA_RATE]);
1112 *arg = (unsigned long)cl; 1117 *arg = (unsigned long)cl;
1113 return 0; 1118 return 0;
1114} 1119}
@@ -1304,11 +1309,11 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
1304 tsc.m1 = sm2m(sc->sm1); 1309 tsc.m1 = sm2m(sc->sm1);
1305 tsc.d = dx2d(sc->dx); 1310 tsc.d = dx2d(sc->dx);
1306 tsc.m2 = sm2m(sc->sm2); 1311 tsc.m2 = sm2m(sc->sm2);
1307 RTA_PUT(skb, attr, sizeof(tsc), &tsc); 1312 NLA_PUT(skb, attr, sizeof(tsc), &tsc);
1308 1313
1309 return skb->len; 1314 return skb->len;
1310 1315
1311 rtattr_failure: 1316 nla_put_failure:
1312 return -1; 1317 return -1;
1313} 1318}
1314 1319
@@ -1317,19 +1322,19 @@ hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
1317{ 1322{
1318 if ((cl->cl_flags & HFSC_RSC) && 1323 if ((cl->cl_flags & HFSC_RSC) &&
1319 (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0)) 1324 (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
1320 goto rtattr_failure; 1325 goto nla_put_failure;
1321 1326
1322 if ((cl->cl_flags & HFSC_FSC) && 1327 if ((cl->cl_flags & HFSC_FSC) &&
1323 (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0)) 1328 (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
1324 goto rtattr_failure; 1329 goto nla_put_failure;
1325 1330
1326 if ((cl->cl_flags & HFSC_USC) && 1331 if ((cl->cl_flags & HFSC_USC) &&
1327 (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0)) 1332 (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
1328 goto rtattr_failure; 1333 goto nla_put_failure;
1329 1334
1330 return skb->len; 1335 return skb->len;
1331 1336
1332 rtattr_failure: 1337 nla_put_failure:
1333 return -1; 1338 return -1;
1334} 1339}
1335 1340
@@ -1338,22 +1343,23 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
1338 struct tcmsg *tcm) 1343 struct tcmsg *tcm)
1339{ 1344{
1340 struct hfsc_class *cl = (struct hfsc_class *)arg; 1345 struct hfsc_class *cl = (struct hfsc_class *)arg;
1341 unsigned char *b = skb_tail_pointer(skb); 1346 struct nlattr *nest;
1342 struct rtattr *rta = (struct rtattr *)b;
1343 1347
1344 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; 1348 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
1345 tcm->tcm_handle = cl->classid; 1349 tcm->tcm_handle = cl->classid;
1346 if (cl->level == 0) 1350 if (cl->level == 0)
1347 tcm->tcm_info = cl->qdisc->handle; 1351 tcm->tcm_info = cl->qdisc->handle;
1348 1352
1349 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1353 nest = nla_nest_start(skb, TCA_OPTIONS);
1354 if (nest == NULL)
1355 goto nla_put_failure;
1350 if (hfsc_dump_curves(skb, cl) < 0) 1356 if (hfsc_dump_curves(skb, cl) < 0)
1351 goto rtattr_failure; 1357 goto nla_put_failure;
1352 rta->rta_len = skb_tail_pointer(skb) - b; 1358 nla_nest_end(skb, nest);
1353 return skb->len; 1359 return skb->len;
1354 1360
1355 rtattr_failure: 1361 nla_put_failure:
1356 nlmsg_trim(skb, b); 1362 nla_nest_cancel(skb, nest);
1357 return -1; 1363 return -1;
1358} 1364}
1359 1365
@@ -1423,15 +1429,15 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
1423} 1429}
1424 1430
1425static int 1431static int
1426hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) 1432hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1427{ 1433{
1428 struct hfsc_sched *q = qdisc_priv(sch); 1434 struct hfsc_sched *q = qdisc_priv(sch);
1429 struct tc_hfsc_qopt *qopt; 1435 struct tc_hfsc_qopt *qopt;
1430 unsigned int i; 1436 unsigned int i;
1431 1437
1432 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1438 if (opt == NULL || nla_len(opt) < sizeof(*qopt))
1433 return -EINVAL; 1439 return -EINVAL;
1434 qopt = RTA_DATA(opt); 1440 qopt = nla_data(opt);
1435 1441
1436 q->defcls = qopt->defcls; 1442 q->defcls = qopt->defcls;
1437 for (i = 0; i < HFSC_HSIZE; i++) 1443 for (i = 0; i < HFSC_HSIZE; i++)
@@ -1459,14 +1465,14 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
1459} 1465}
1460 1466
1461static int 1467static int
1462hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt) 1468hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt)
1463{ 1469{
1464 struct hfsc_sched *q = qdisc_priv(sch); 1470 struct hfsc_sched *q = qdisc_priv(sch);
1465 struct tc_hfsc_qopt *qopt; 1471 struct tc_hfsc_qopt *qopt;
1466 1472
1467 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1473 if (opt == NULL || nla_len(opt) < sizeof(*qopt))
1468 return -EINVAL; 1474 return -EINVAL;
1469 qopt = RTA_DATA(opt); 1475 qopt = nla_data(opt);
1470 1476
1471 sch_tree_lock(sch); 1477 sch_tree_lock(sch);
1472 q->defcls = qopt->defcls; 1478 q->defcls = qopt->defcls;
@@ -1550,10 +1556,10 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
1550 struct tc_hfsc_qopt qopt; 1556 struct tc_hfsc_qopt qopt;
1551 1557
1552 qopt.defcls = q->defcls; 1558 qopt.defcls = q->defcls;
1553 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 1559 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
1554 return skb->len; 1560 return skb->len;
1555 1561
1556 rtattr_failure: 1562 nla_put_failure:
1557 nlmsg_trim(skb, b); 1563 nlmsg_trim(skb, b);
1558 return -1; 1564 return -1;
1559} 1565}
@@ -1698,7 +1704,7 @@ hfsc_drop(struct Qdisc *sch)
1698 return 0; 1704 return 0;
1699} 1705}
1700 1706
1701static struct Qdisc_class_ops hfsc_class_ops = { 1707static const struct Qdisc_class_ops hfsc_class_ops = {
1702 .change = hfsc_change_class, 1708 .change = hfsc_change_class,
1703 .delete = hfsc_delete_class, 1709 .delete = hfsc_delete_class,
1704 .graft = hfsc_graft_class, 1710 .graft = hfsc_graft_class,
@@ -1714,7 +1720,7 @@ static struct Qdisc_class_ops hfsc_class_ops = {
1714 .walk = hfsc_walk 1720 .walk = hfsc_walk
1715}; 1721};
1716 1722
1717static struct Qdisc_ops hfsc_qdisc_ops = { 1723static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
1718 .id = "hfsc", 1724 .id = "hfsc",
1719 .init = hfsc_init_qdisc, 1725 .init = hfsc_init_qdisc,
1720 .change = hfsc_change_qdisc, 1726 .change = hfsc_change_qdisc,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5e608a64935a..e1a579efc215 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -214,10 +214,6 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
214 * then finish and return direct queue. 214 * then finish and return direct queue.
215 */ 215 */
216#define HTB_DIRECT (struct htb_class*)-1 216#define HTB_DIRECT (struct htb_class*)-1
217static inline u32 htb_classid(struct htb_class *cl)
218{
219 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
220}
221 217
222static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 218static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
223 int *qerr) 219 int *qerr)
@@ -996,19 +992,33 @@ static void htb_reset(struct Qdisc *sch)
996 INIT_LIST_HEAD(q->drops + i); 992 INIT_LIST_HEAD(q->drops + i);
997} 993}
998 994
999static int htb_init(struct Qdisc *sch, struct rtattr *opt) 995static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
996 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
997 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
998 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
999 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1000};
1001
1002static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1000{ 1003{
1001 struct htb_sched *q = qdisc_priv(sch); 1004 struct htb_sched *q = qdisc_priv(sch);
1002 struct rtattr *tb[TCA_HTB_INIT]; 1005 struct nlattr *tb[TCA_HTB_INIT + 1];
1003 struct tc_htb_glob *gopt; 1006 struct tc_htb_glob *gopt;
1007 int err;
1004 int i; 1008 int i;
1005 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || 1009
1006 tb[TCA_HTB_INIT - 1] == NULL || 1010 if (!opt)
1007 RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) { 1011 return -EINVAL;
1012
1013 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1014 if (err < 0)
1015 return err;
1016
1017 if (tb[TCA_HTB_INIT] == NULL) {
1008 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); 1018 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1009 return -EINVAL; 1019 return -EINVAL;
1010 } 1020 }
1011 gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]); 1021 gopt = nla_data(tb[TCA_HTB_INIT]);
1012 if (gopt->version != HTB_VER >> 16) { 1022 if (gopt->version != HTB_VER >> 16) {
1013 printk(KERN_ERR 1023 printk(KERN_ERR
1014 "HTB: need tc/htb version %d (minor is %d), you have %d\n", 1024 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
@@ -1039,25 +1049,29 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1039static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) 1049static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1040{ 1050{
1041 struct htb_sched *q = qdisc_priv(sch); 1051 struct htb_sched *q = qdisc_priv(sch);
1042 unsigned char *b = skb_tail_pointer(skb); 1052 struct nlattr *nest;
1043 struct rtattr *rta;
1044 struct tc_htb_glob gopt; 1053 struct tc_htb_glob gopt;
1054
1045 spin_lock_bh(&sch->dev->queue_lock); 1055 spin_lock_bh(&sch->dev->queue_lock);
1046 gopt.direct_pkts = q->direct_pkts;
1047 1056
1057 gopt.direct_pkts = q->direct_pkts;
1048 gopt.version = HTB_VER; 1058 gopt.version = HTB_VER;
1049 gopt.rate2quantum = q->rate2quantum; 1059 gopt.rate2quantum = q->rate2quantum;
1050 gopt.defcls = q->defcls; 1060 gopt.defcls = q->defcls;
1051 gopt.debug = 0; 1061 gopt.debug = 0;
1052 rta = (struct rtattr *)b; 1062
1053 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1063 nest = nla_nest_start(skb, TCA_OPTIONS);
1054 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); 1064 if (nest == NULL)
1055 rta->rta_len = skb_tail_pointer(skb) - b; 1065 goto nla_put_failure;
1066 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1067 nla_nest_end(skb, nest);
1068
1056 spin_unlock_bh(&sch->dev->queue_lock); 1069 spin_unlock_bh(&sch->dev->queue_lock);
1057 return skb->len; 1070 return skb->len;
1058rtattr_failure: 1071
1072nla_put_failure:
1059 spin_unlock_bh(&sch->dev->queue_lock); 1073 spin_unlock_bh(&sch->dev->queue_lock);
1060 nlmsg_trim(skb, skb_tail_pointer(skb)); 1074 nla_nest_cancel(skb, nest);
1061 return -1; 1075 return -1;
1062} 1076}
1063 1077
@@ -1065,8 +1079,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1065 struct sk_buff *skb, struct tcmsg *tcm) 1079 struct sk_buff *skb, struct tcmsg *tcm)
1066{ 1080{
1067 struct htb_class *cl = (struct htb_class *)arg; 1081 struct htb_class *cl = (struct htb_class *)arg;
1068 unsigned char *b = skb_tail_pointer(skb); 1082 struct nlattr *nest;
1069 struct rtattr *rta;
1070 struct tc_htb_opt opt; 1083 struct tc_htb_opt opt;
1071 1084
1072 spin_lock_bh(&sch->dev->queue_lock); 1085 spin_lock_bh(&sch->dev->queue_lock);
@@ -1075,8 +1088,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1075 if (!cl->level && cl->un.leaf.q) 1088 if (!cl->level && cl->un.leaf.q)
1076 tcm->tcm_info = cl->un.leaf.q->handle; 1089 tcm->tcm_info = cl->un.leaf.q->handle;
1077 1090
1078 rta = (struct rtattr *)b; 1091 nest = nla_nest_start(skb, TCA_OPTIONS);
1079 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1092 if (nest == NULL)
1093 goto nla_put_failure;
1080 1094
1081 memset(&opt, 0, sizeof(opt)); 1095 memset(&opt, 0, sizeof(opt));
1082 1096
@@ -1087,13 +1101,15 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1087 opt.quantum = cl->un.leaf.quantum; 1101 opt.quantum = cl->un.leaf.quantum;
1088 opt.prio = cl->un.leaf.prio; 1102 opt.prio = cl->un.leaf.prio;
1089 opt.level = cl->level; 1103 opt.level = cl->level;
1090 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); 1104 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1091 rta->rta_len = skb_tail_pointer(skb) - b; 1105
1106 nla_nest_end(skb, nest);
1092 spin_unlock_bh(&sch->dev->queue_lock); 1107 spin_unlock_bh(&sch->dev->queue_lock);
1093 return skb->len; 1108 return skb->len;
1094rtattr_failure: 1109
1110nla_put_failure:
1095 spin_unlock_bh(&sch->dev->queue_lock); 1111 spin_unlock_bh(&sch->dev->queue_lock);
1096 nlmsg_trim(skb, b); 1112 nla_nest_cancel(skb, nest);
1097 return -1; 1113 return -1;
1098} 1114}
1099 1115
@@ -1294,29 +1310,35 @@ static void htb_put(struct Qdisc *sch, unsigned long arg)
1294} 1310}
1295 1311
1296static int htb_change_class(struct Qdisc *sch, u32 classid, 1312static int htb_change_class(struct Qdisc *sch, u32 classid,
1297 u32 parentid, struct rtattr **tca, 1313 u32 parentid, struct nlattr **tca,
1298 unsigned long *arg) 1314 unsigned long *arg)
1299{ 1315{
1300 int err = -EINVAL; 1316 int err = -EINVAL;
1301 struct htb_sched *q = qdisc_priv(sch); 1317 struct htb_sched *q = qdisc_priv(sch);
1302 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1318 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1303 struct rtattr *opt = tca[TCA_OPTIONS - 1]; 1319 struct nlattr *opt = tca[TCA_OPTIONS];
1304 struct qdisc_rate_table *rtab = NULL, *ctab = NULL; 1320 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1305 struct rtattr *tb[TCA_HTB_RTAB]; 1321 struct nlattr *tb[TCA_HTB_RTAB + 1];
1306 struct tc_htb_opt *hopt; 1322 struct tc_htb_opt *hopt;
1307 1323
1308 /* extract all subattrs from opt attr */ 1324 /* extract all subattrs from opt attr */
1309 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || 1325 if (!opt)
1310 tb[TCA_HTB_PARMS - 1] == NULL || 1326 goto failure;
1311 RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt)) 1327
1328 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1329 if (err < 0)
1330 goto failure;
1331
1332 err = -EINVAL;
1333 if (tb[TCA_HTB_PARMS] == NULL)
1312 goto failure; 1334 goto failure;
1313 1335
1314 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); 1336 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1315 1337
1316 hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]); 1338 hopt = nla_data(tb[TCA_HTB_PARMS]);
1317 1339
1318 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]); 1340 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1319 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]); 1341 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1320 if (!rtab || !ctab) 1342 if (!rtab || !ctab)
1321 goto failure; 1343 goto failure;
1322 1344
@@ -1324,12 +1346,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1324 struct Qdisc *new_q; 1346 struct Qdisc *new_q;
1325 int prio; 1347 int prio;
1326 struct { 1348 struct {
1327 struct rtattr rta; 1349 struct nlattr nla;
1328 struct gnet_estimator opt; 1350 struct gnet_estimator opt;
1329 } est = { 1351 } est = {
1330 .rta = { 1352 .nla = {
1331 .rta_len = RTA_LENGTH(sizeof(est.opt)), 1353 .nla_len = nla_attr_size(sizeof(est.opt)),
1332 .rta_type = TCA_RATE, 1354 .nla_type = TCA_RATE,
1333 }, 1355 },
1334 .opt = { 1356 .opt = {
1335 /* 4s interval, 16s averaging constant */ 1357 /* 4s interval, 16s averaging constant */
@@ -1354,7 +1376,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1354 1376
1355 gen_new_estimator(&cl->bstats, &cl->rate_est, 1377 gen_new_estimator(&cl->bstats, &cl->rate_est,
1356 &sch->dev->queue_lock, 1378 &sch->dev->queue_lock,
1357 tca[TCA_RATE-1] ? : &est.rta); 1379 tca[TCA_RATE] ? : &est.nla);
1358 cl->refcnt = 1; 1380 cl->refcnt = 1;
1359 INIT_LIST_HEAD(&cl->sibling); 1381 INIT_LIST_HEAD(&cl->sibling);
1360 INIT_HLIST_NODE(&cl->hlist); 1382 INIT_HLIST_NODE(&cl->hlist);
@@ -1407,10 +1429,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1407 list_add_tail(&cl->sibling, 1429 list_add_tail(&cl->sibling,
1408 parent ? &parent->children : &q->root); 1430 parent ? &parent->children : &q->root);
1409 } else { 1431 } else {
1410 if (tca[TCA_RATE-1]) 1432 if (tca[TCA_RATE])
1411 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1433 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1412 &sch->dev->queue_lock, 1434 &sch->dev->queue_lock,
1413 tca[TCA_RATE-1]); 1435 tca[TCA_RATE]);
1414 sch_tree_lock(sch); 1436 sch_tree_lock(sch);
1415 } 1437 }
1416 1438
@@ -1529,7 +1551,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1529 } 1551 }
1530} 1552}
1531 1553
1532static struct Qdisc_class_ops htb_class_ops = { 1554static const struct Qdisc_class_ops htb_class_ops = {
1533 .graft = htb_graft, 1555 .graft = htb_graft,
1534 .leaf = htb_leaf, 1556 .leaf = htb_leaf,
1535 .qlen_notify = htb_qlen_notify, 1557 .qlen_notify = htb_qlen_notify,
@@ -1545,7 +1567,7 @@ static struct Qdisc_class_ops htb_class_ops = {
1545 .dump_stats = htb_dump_class_stats, 1567 .dump_stats = htb_dump_class_stats,
1546}; 1568};
1547 1569
1548static struct Qdisc_ops htb_qdisc_ops = { 1570static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1549 .next = NULL, 1571 .next = NULL,
1550 .cl_ops = &htb_class_ops, 1572 .cl_ops = &htb_class_ops,
1551 .id = "htb", 1573 .id = "htb",
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 3f8335e6ea2e..274b1ddb160c 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -12,387 +12,148 @@
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/rtnetlink.h> 14#include <linux/rtnetlink.h>
15#include <linux/netfilter_ipv4.h>
16#include <linux/netfilter_ipv6.h>
17#include <linux/netfilter.h>
18#include <net/netlink.h> 15#include <net/netlink.h>
19#include <net/pkt_sched.h> 16#include <net/pkt_sched.h>
20 17
21 18
22#undef DEBUG_INGRESS
23
24#ifdef DEBUG_INGRESS /* control */
25#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
26#else
27#define DPRINTK(format,args...)
28#endif
29
30#if 0 /* data */
31#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
32#else
33#define D2PRINTK(format,args...)
34#endif
35
36
37#define PRIV(sch) qdisc_priv(sch)
38
39
40/* Thanks to Doron Oz for this hack
41*/
42#ifndef CONFIG_NET_CLS_ACT
43#ifdef CONFIG_NETFILTER
44static int nf_registered;
45#endif
46#endif
47
48struct ingress_qdisc_data { 19struct ingress_qdisc_data {
49 struct Qdisc *q;
50 struct tcf_proto *filter_list; 20 struct tcf_proto *filter_list;
51}; 21};
52 22
53
54/* ------------------------- Class/flow operations ------------------------- */ 23/* ------------------------- Class/flow operations ------------------------- */
55 24
56 25static int ingress_graft(struct Qdisc *sch, unsigned long arg,
57static int ingress_graft(struct Qdisc *sch,unsigned long arg, 26 struct Qdisc *new, struct Qdisc **old)
58 struct Qdisc *new,struct Qdisc **old)
59{ 27{
60#ifdef DEBUG_INGRESS 28 return -EOPNOTSUPP;
61 struct ingress_qdisc_data *p = PRIV(sch);
62#endif
63
64 DPRINTK("ingress_graft(sch %p,[qdisc %p],new %p,old %p)\n",
65 sch, p, new, old);
66 DPRINTK("\n ingress_graft: You cannot add qdiscs to classes");
67 return 1;
68} 29}
69 30
70
71static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) 31static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
72{ 32{
73 return NULL; 33 return NULL;
74} 34}
75 35
76 36static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
77static unsigned long ingress_get(struct Qdisc *sch,u32 classid)
78{ 37{
79#ifdef DEBUG_INGRESS
80 struct ingress_qdisc_data *p = PRIV(sch);
81#endif
82 DPRINTK("ingress_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
83 return TC_H_MIN(classid) + 1; 38 return TC_H_MIN(classid) + 1;
84} 39}
85 40
86
87static unsigned long ingress_bind_filter(struct Qdisc *sch, 41static unsigned long ingress_bind_filter(struct Qdisc *sch,
88 unsigned long parent, u32 classid) 42 unsigned long parent, u32 classid)
89{ 43{
90 return ingress_get(sch, classid); 44 return ingress_get(sch, classid);
91} 45}
92 46
93
94static void ingress_put(struct Qdisc *sch, unsigned long cl) 47static void ingress_put(struct Qdisc *sch, unsigned long cl)
95{ 48{
96} 49}
97 50
98
99static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent, 51static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
100 struct rtattr **tca, unsigned long *arg) 52 struct nlattr **tca, unsigned long *arg)
101{ 53{
102#ifdef DEBUG_INGRESS
103 struct ingress_qdisc_data *p = PRIV(sch);
104#endif
105 DPRINTK("ingress_change(sch %p,[qdisc %p],classid %x,parent %x),"
106 "arg 0x%lx\n", sch, p, classid, parent, *arg);
107 DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
108 return 0; 54 return 0;
109} 55}
110 56
111 57static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
112
113static void ingress_walk(struct Qdisc *sch,struct qdisc_walker *walker)
114{ 58{
115#ifdef DEBUG_INGRESS 59 return;
116 struct ingress_qdisc_data *p = PRIV(sch);
117#endif
118 DPRINTK("ingress_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
119 DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
120} 60}
121 61
122 62static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
123static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch,unsigned long cl)
124{ 63{
125 struct ingress_qdisc_data *p = PRIV(sch); 64 struct ingress_qdisc_data *p = qdisc_priv(sch);
126 65
127 return &p->filter_list; 66 return &p->filter_list;
128} 67}
129 68
130
131/* --------------------------- Qdisc operations ---------------------------- */ 69/* --------------------------- Qdisc operations ---------------------------- */
132 70
133 71static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
134static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
135{ 72{
136 struct ingress_qdisc_data *p = PRIV(sch); 73 struct ingress_qdisc_data *p = qdisc_priv(sch);
137 struct tcf_result res; 74 struct tcf_result res;
138 int result; 75 int result;
139 76
140 D2PRINTK("ingress_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
141 result = tc_classify(skb, p->filter_list, &res); 77 result = tc_classify(skb, p->filter_list, &res);
142 D2PRINTK("result %d class 0x%04x\n", result, res.classid); 78
143 /*
144 * Unlike normal "enqueue" functions, ingress_enqueue returns a
145 * firewall FW_* code.
146 */
147#ifdef CONFIG_NET_CLS_ACT
148 sch->bstats.packets++; 79 sch->bstats.packets++;
149 sch->bstats.bytes += skb->len; 80 sch->bstats.bytes += skb->len;
150 switch (result) { 81 switch (result) {
151 case TC_ACT_SHOT: 82 case TC_ACT_SHOT:
152 result = TC_ACT_SHOT; 83 result = TC_ACT_SHOT;
153 sch->qstats.drops++; 84 sch->qstats.drops++;
154 break; 85 break;
155 case TC_ACT_STOLEN: 86 case TC_ACT_STOLEN:
156 case TC_ACT_QUEUED: 87 case TC_ACT_QUEUED:
157 result = TC_ACT_STOLEN; 88 result = TC_ACT_STOLEN;
158 break; 89 break;
159 case TC_ACT_RECLASSIFY: 90 case TC_ACT_RECLASSIFY:
160 case TC_ACT_OK: 91 case TC_ACT_OK:
161 skb->tc_index = TC_H_MIN(res.classid); 92 skb->tc_index = TC_H_MIN(res.classid);
162 default: 93 default:
163 result = TC_ACT_OK; 94 result = TC_ACT_OK;
164 break; 95 break;
165 } 96 }
166#else
167 D2PRINTK("Overriding result to ACCEPT\n");
168 result = NF_ACCEPT;
169 sch->bstats.packets++;
170 sch->bstats.bytes += skb->len;
171#endif
172 97
173 return result; 98 return result;
174} 99}
175 100
176
177static struct sk_buff *ingress_dequeue(struct Qdisc *sch)
178{
179/*
180 struct ingress_qdisc_data *p = PRIV(sch);
181 D2PRINTK("ingress_dequeue(sch %p,[qdisc %p])\n",sch,PRIV(p));
182*/
183 return NULL;
184}
185
186
187static int ingress_requeue(struct sk_buff *skb,struct Qdisc *sch)
188{
189/*
190 struct ingress_qdisc_data *p = PRIV(sch);
191 D2PRINTK("ingress_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,PRIV(p));
192*/
193 return 0;
194}
195
196static unsigned int ingress_drop(struct Qdisc *sch)
197{
198#ifdef DEBUG_INGRESS
199 struct ingress_qdisc_data *p = PRIV(sch);
200#endif
201 DPRINTK("ingress_drop(sch %p,[qdisc %p])\n", sch, p);
202 return 0;
203}
204
205#ifndef CONFIG_NET_CLS_ACT
206#ifdef CONFIG_NETFILTER
207static unsigned int
208ing_hook(unsigned int hook, struct sk_buff *skb,
209 const struct net_device *indev,
210 const struct net_device *outdev,
211 int (*okfn)(struct sk_buff *))
212{
213
214 struct Qdisc *q;
215 struct net_device *dev = skb->dev;
216 int fwres=NF_ACCEPT;
217
218 DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
219 skb->sk ? "(owned)" : "(unowned)",
220 skb->dev ? skb->dev->name : "(no dev)",
221 skb->len);
222
223 if (dev->qdisc_ingress) {
224 spin_lock(&dev->ingress_lock);
225 if ((q = dev->qdisc_ingress) != NULL)
226 fwres = q->enqueue(skb, q);
227 spin_unlock(&dev->ingress_lock);
228 }
229
230 return fwres;
231}
232
233/* after ipt_filter */
234static struct nf_hook_ops ing_ops = {
235 .hook = ing_hook,
236 .owner = THIS_MODULE,
237 .pf = PF_INET,
238 .hooknum = NF_IP_PRE_ROUTING,
239 .priority = NF_IP_PRI_FILTER + 1,
240};
241
242static struct nf_hook_ops ing6_ops = {
243 .hook = ing_hook,
244 .owner = THIS_MODULE,
245 .pf = PF_INET6,
246 .hooknum = NF_IP6_PRE_ROUTING,
247 .priority = NF_IP6_PRI_FILTER + 1,
248};
249
250#endif
251#endif
252
253static int ingress_init(struct Qdisc *sch,struct rtattr *opt)
254{
255 struct ingress_qdisc_data *p = PRIV(sch);
256
257/* Make sure either netfilter or preferably CLS_ACT is
258* compiled in */
259#ifndef CONFIG_NET_CLS_ACT
260#ifndef CONFIG_NETFILTER
261 printk("You MUST compile classifier actions into the kernel\n");
262 return -EINVAL;
263#else
264 printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
265#endif
266#endif
267
268#ifndef CONFIG_NET_CLS_ACT
269#ifdef CONFIG_NETFILTER
270 if (!nf_registered) {
271 if (nf_register_hook(&ing_ops) < 0) {
272 printk("ingress qdisc registration error \n");
273 return -EINVAL;
274 }
275 nf_registered++;
276
277 if (nf_register_hook(&ing6_ops) < 0) {
278 printk("IPv6 ingress qdisc registration error, " \
279 "disabling IPv6 support.\n");
280 } else
281 nf_registered++;
282 }
283#endif
284#endif
285
286 DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
287 p->q = &noop_qdisc;
288 return 0;
289}
290
291
292static void ingress_reset(struct Qdisc *sch)
293{
294 struct ingress_qdisc_data *p = PRIV(sch);
295
296 DPRINTK("ingress_reset(sch %p,[qdisc %p])\n", sch, p);
297
298/*
299#if 0
300*/
301/* for future use */
302 qdisc_reset(p->q);
303/*
304#endif
305*/
306}
307
308/* ------------------------------------------------------------- */
309
310
311/* ------------------------------------------------------------- */ 101/* ------------------------------------------------------------- */
312 102
313static void ingress_destroy(struct Qdisc *sch) 103static void ingress_destroy(struct Qdisc *sch)
314{ 104{
315 struct ingress_qdisc_data *p = PRIV(sch); 105 struct ingress_qdisc_data *p = qdisc_priv(sch);
316 106
317 DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
318 tcf_destroy_chain(p->filter_list); 107 tcf_destroy_chain(p->filter_list);
319#if 0
320/* for future use */
321 qdisc_destroy(p->q);
322#endif
323} 108}
324 109
325
326static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) 110static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
327{ 111{
328 unsigned char *b = skb_tail_pointer(skb); 112 struct nlattr *nest;
329 struct rtattr *rta;
330 113
331 rta = (struct rtattr *) b; 114 nest = nla_nest_start(skb, TCA_OPTIONS);
332 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 115 if (nest == NULL)
333 rta->rta_len = skb_tail_pointer(skb) - b; 116 goto nla_put_failure;
117 nla_nest_end(skb, nest);
334 return skb->len; 118 return skb->len;
335 119
336rtattr_failure: 120nla_put_failure:
337 nlmsg_trim(skb, b); 121 nla_nest_cancel(skb, nest);
338 return -1; 122 return -1;
339} 123}
340 124
341static struct Qdisc_class_ops ingress_class_ops = { 125static const struct Qdisc_class_ops ingress_class_ops = {
342 .graft = ingress_graft, 126 .graft = ingress_graft,
343 .leaf = ingress_leaf, 127 .leaf = ingress_leaf,
344 .get = ingress_get, 128 .get = ingress_get,
345 .put = ingress_put, 129 .put = ingress_put,
346 .change = ingress_change, 130 .change = ingress_change,
347 .delete = NULL,
348 .walk = ingress_walk, 131 .walk = ingress_walk,
349 .tcf_chain = ingress_find_tcf, 132 .tcf_chain = ingress_find_tcf,
350 .bind_tcf = ingress_bind_filter, 133 .bind_tcf = ingress_bind_filter,
351 .unbind_tcf = ingress_put, 134 .unbind_tcf = ingress_put,
352 .dump = NULL,
353}; 135};
354 136
355static struct Qdisc_ops ingress_qdisc_ops = { 137static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
356 .next = NULL,
357 .cl_ops = &ingress_class_ops, 138 .cl_ops = &ingress_class_ops,
358 .id = "ingress", 139 .id = "ingress",
359 .priv_size = sizeof(struct ingress_qdisc_data), 140 .priv_size = sizeof(struct ingress_qdisc_data),
360 .enqueue = ingress_enqueue, 141 .enqueue = ingress_enqueue,
361 .dequeue = ingress_dequeue,
362 .requeue = ingress_requeue,
363 .drop = ingress_drop,
364 .init = ingress_init,
365 .reset = ingress_reset,
366 .destroy = ingress_destroy, 142 .destroy = ingress_destroy,
367 .change = NULL,
368 .dump = ingress_dump, 143 .dump = ingress_dump,
369 .owner = THIS_MODULE, 144 .owner = THIS_MODULE,
370}; 145};
371 146
372static int __init ingress_module_init(void) 147static int __init ingress_module_init(void)
373{ 148{
374 int ret = 0; 149 return register_qdisc(&ingress_qdisc_ops);
375
376 if ((ret = register_qdisc(&ingress_qdisc_ops)) < 0) {
377 printk("Unable to register Ingress qdisc\n");
378 return ret;
379 }
380
381 return ret;
382} 150}
151
383static void __exit ingress_module_exit(void) 152static void __exit ingress_module_exit(void)
384{ 153{
385 unregister_qdisc(&ingress_qdisc_ops); 154 unregister_qdisc(&ingress_qdisc_ops);
386#ifndef CONFIG_NET_CLS_ACT
387#ifdef CONFIG_NETFILTER
388 if (nf_registered) {
389 nf_unregister_hook(&ing_ops);
390 if (nf_registered > 1)
391 nf_unregister_hook(&ing6_ops);
392 }
393#endif
394#endif
395} 155}
156
396module_init(ingress_module_init) 157module_init(ingress_module_init)
397module_exit(ingress_module_exit) 158module_exit(ingress_module_exit)
398MODULE_LICENSE("GPL"); 159MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9e5e87e81f00..c9c649b26eaa 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -313,21 +313,21 @@ static void netem_reset(struct Qdisc *sch)
313/* Pass size change message down to embedded FIFO */ 313/* Pass size change message down to embedded FIFO */
314static int set_fifo_limit(struct Qdisc *q, int limit) 314static int set_fifo_limit(struct Qdisc *q, int limit)
315{ 315{
316 struct rtattr *rta; 316 struct nlattr *nla;
317 int ret = -ENOMEM; 317 int ret = -ENOMEM;
318 318
319 /* Hack to avoid sending change message to non-FIFO */ 319 /* Hack to avoid sending change message to non-FIFO */
320 if (strncmp(q->ops->id + 1, "fifo", 4) != 0) 320 if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
321 return 0; 321 return 0;
322 322
323 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 323 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
324 if (rta) { 324 if (nla) {
325 rta->rta_type = RTM_NEWQDISC; 325 nla->nla_type = RTM_NEWQDISC;
326 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 326 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
327 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 327 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
328 328
329 ret = q->ops->change(q, rta); 329 ret = q->ops->change(q, nla);
330 kfree(rta); 330 kfree(nla);
331 } 331 }
332 return ret; 332 return ret;
333} 333}
@@ -336,11 +336,11 @@ static int set_fifo_limit(struct Qdisc *q, int limit)
336 * Distribution data is a variable size payload containing 336 * Distribution data is a variable size payload containing
337 * signed 16 bit values. 337 * signed 16 bit values.
338 */ 338 */
339static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr) 339static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
340{ 340{
341 struct netem_sched_data *q = qdisc_priv(sch); 341 struct netem_sched_data *q = qdisc_priv(sch);
342 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16); 342 unsigned long n = nla_len(attr)/sizeof(__s16);
343 const __s16 *data = RTA_DATA(attr); 343 const __s16 *data = nla_data(attr);
344 struct disttable *d; 344 struct disttable *d;
345 int i; 345 int i;
346 346
@@ -363,13 +363,10 @@ static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
363 return 0; 363 return 0;
364} 364}
365 365
366static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) 366static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
367{ 367{
368 struct netem_sched_data *q = qdisc_priv(sch); 368 struct netem_sched_data *q = qdisc_priv(sch);
369 const struct tc_netem_corr *c = RTA_DATA(attr); 369 const struct tc_netem_corr *c = nla_data(attr);
370
371 if (RTA_PAYLOAD(attr) != sizeof(*c))
372 return -EINVAL;
373 370
374 init_crandom(&q->delay_cor, c->delay_corr); 371 init_crandom(&q->delay_cor, c->delay_corr);
375 init_crandom(&q->loss_cor, c->loss_corr); 372 init_crandom(&q->loss_cor, c->loss_corr);
@@ -377,43 +374,48 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
377 return 0; 374 return 0;
378} 375}
379 376
380static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) 377static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
381{ 378{
382 struct netem_sched_data *q = qdisc_priv(sch); 379 struct netem_sched_data *q = qdisc_priv(sch);
383 const struct tc_netem_reorder *r = RTA_DATA(attr); 380 const struct tc_netem_reorder *r = nla_data(attr);
384
385 if (RTA_PAYLOAD(attr) != sizeof(*r))
386 return -EINVAL;
387 381
388 q->reorder = r->probability; 382 q->reorder = r->probability;
389 init_crandom(&q->reorder_cor, r->correlation); 383 init_crandom(&q->reorder_cor, r->correlation);
390 return 0; 384 return 0;
391} 385}
392 386
393static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr) 387static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
394{ 388{
395 struct netem_sched_data *q = qdisc_priv(sch); 389 struct netem_sched_data *q = qdisc_priv(sch);
396 const struct tc_netem_corrupt *r = RTA_DATA(attr); 390 const struct tc_netem_corrupt *r = nla_data(attr);
397
398 if (RTA_PAYLOAD(attr) != sizeof(*r))
399 return -EINVAL;
400 391
401 q->corrupt = r->probability; 392 q->corrupt = r->probability;
402 init_crandom(&q->corrupt_cor, r->correlation); 393 init_crandom(&q->corrupt_cor, r->correlation);
403 return 0; 394 return 0;
404} 395}
405 396
397static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
398 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
399 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
400 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
401};
402
406/* Parse netlink message to set options */ 403/* Parse netlink message to set options */
407static int netem_change(struct Qdisc *sch, struct rtattr *opt) 404static int netem_change(struct Qdisc *sch, struct nlattr *opt)
408{ 405{
409 struct netem_sched_data *q = qdisc_priv(sch); 406 struct netem_sched_data *q = qdisc_priv(sch);
407 struct nlattr *tb[TCA_NETEM_MAX + 1];
410 struct tc_netem_qopt *qopt; 408 struct tc_netem_qopt *qopt;
411 int ret; 409 int ret;
412 410
413 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 411 if (opt == NULL)
414 return -EINVAL; 412 return -EINVAL;
415 413
416 qopt = RTA_DATA(opt); 414 ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy,
415 qopt, sizeof(*qopt));
416 if (ret < 0)
417 return ret;
418
417 ret = set_fifo_limit(q->qdisc, qopt->limit); 419 ret = set_fifo_limit(q->qdisc, qopt->limit);
418 if (ret) { 420 if (ret) {
419 pr_debug("netem: can't set fifo limit\n"); 421 pr_debug("netem: can't set fifo limit\n");
@@ -434,39 +436,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
434 if (q->gap) 436 if (q->gap)
435 q->reorder = ~0; 437 q->reorder = ~0;
436 438
437 /* Handle nested options after initial queue options. 439 if (tb[TCA_NETEM_CORR]) {
438 * Should have put all options in nested format but too late now. 440 ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
439 */ 441 if (ret)
440 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) { 442 return ret;
441 struct rtattr *tb[TCA_NETEM_MAX]; 443 }
442 if (rtattr_parse(tb, TCA_NETEM_MAX,
443 RTA_DATA(opt) + sizeof(*qopt),
444 RTA_PAYLOAD(opt) - sizeof(*qopt)))
445 return -EINVAL;
446
447 if (tb[TCA_NETEM_CORR-1]) {
448 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
449 if (ret)
450 return ret;
451 }
452 444
453 if (tb[TCA_NETEM_DELAY_DIST-1]) { 445 if (tb[TCA_NETEM_DELAY_DIST]) {
454 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]); 446 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
455 if (ret) 447 if (ret)
456 return ret; 448 return ret;
457 } 449 }
458 450
459 if (tb[TCA_NETEM_REORDER-1]) { 451 if (tb[TCA_NETEM_REORDER]) {
460 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); 452 ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
461 if (ret) 453 if (ret)
462 return ret; 454 return ret;
463 } 455 }
464 456
465 if (tb[TCA_NETEM_CORRUPT-1]) { 457 if (tb[TCA_NETEM_CORRUPT]) {
466 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]); 458 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
467 if (ret) 459 if (ret)
468 return ret; 460 return ret;
469 }
470 } 461 }
471 462
472 return 0; 463 return 0;
@@ -515,13 +506,13 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
515 return qdisc_reshape_fail(nskb, sch); 506 return qdisc_reshape_fail(nskb, sch);
516} 507}
517 508
518static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) 509static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
519{ 510{
520 struct fifo_sched_data *q = qdisc_priv(sch); 511 struct fifo_sched_data *q = qdisc_priv(sch);
521 512
522 if (opt) { 513 if (opt) {
523 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 514 struct tc_fifo_qopt *ctl = nla_data(opt);
524 if (RTA_PAYLOAD(opt) < sizeof(*ctl)) 515 if (nla_len(opt) < sizeof(*ctl))
525 return -EINVAL; 516 return -EINVAL;
526 517
527 q->limit = ctl->limit; 518 q->limit = ctl->limit;
@@ -537,14 +528,14 @@ static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
537 struct fifo_sched_data *q = qdisc_priv(sch); 528 struct fifo_sched_data *q = qdisc_priv(sch);
538 struct tc_fifo_qopt opt = { .limit = q->limit }; 529 struct tc_fifo_qopt opt = { .limit = q->limit };
539 530
540 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 531 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
541 return skb->len; 532 return skb->len;
542 533
543rtattr_failure: 534nla_put_failure:
544 return -1; 535 return -1;
545} 536}
546 537
547static struct Qdisc_ops tfifo_qdisc_ops = { 538static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
548 .id = "tfifo", 539 .id = "tfifo",
549 .priv_size = sizeof(struct fifo_sched_data), 540 .priv_size = sizeof(struct fifo_sched_data),
550 .enqueue = tfifo_enqueue, 541 .enqueue = tfifo_enqueue,
@@ -557,7 +548,7 @@ static struct Qdisc_ops tfifo_qdisc_ops = {
557 .dump = tfifo_dump, 548 .dump = tfifo_dump,
558}; 549};
559 550
560static int netem_init(struct Qdisc *sch, struct rtattr *opt) 551static int netem_init(struct Qdisc *sch, struct nlattr *opt)
561{ 552{
562 struct netem_sched_data *q = qdisc_priv(sch); 553 struct netem_sched_data *q = qdisc_priv(sch);
563 int ret; 554 int ret;
@@ -595,7 +586,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
595{ 586{
596 const struct netem_sched_data *q = qdisc_priv(sch); 587 const struct netem_sched_data *q = qdisc_priv(sch);
597 unsigned char *b = skb_tail_pointer(skb); 588 unsigned char *b = skb_tail_pointer(skb);
598 struct rtattr *rta = (struct rtattr *) b; 589 struct nlattr *nla = (struct nlattr *) b;
599 struct tc_netem_qopt qopt; 590 struct tc_netem_qopt qopt;
600 struct tc_netem_corr cor; 591 struct tc_netem_corr cor;
601 struct tc_netem_reorder reorder; 592 struct tc_netem_reorder reorder;
@@ -607,26 +598,26 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
607 qopt.loss = q->loss; 598 qopt.loss = q->loss;
608 qopt.gap = q->gap; 599 qopt.gap = q->gap;
609 qopt.duplicate = q->duplicate; 600 qopt.duplicate = q->duplicate;
610 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 601 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
611 602
612 cor.delay_corr = q->delay_cor.rho; 603 cor.delay_corr = q->delay_cor.rho;
613 cor.loss_corr = q->loss_cor.rho; 604 cor.loss_corr = q->loss_cor.rho;
614 cor.dup_corr = q->dup_cor.rho; 605 cor.dup_corr = q->dup_cor.rho;
615 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 606 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
616 607
617 reorder.probability = q->reorder; 608 reorder.probability = q->reorder;
618 reorder.correlation = q->reorder_cor.rho; 609 reorder.correlation = q->reorder_cor.rho;
619 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 610 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
620 611
621 corrupt.probability = q->corrupt; 612 corrupt.probability = q->corrupt;
622 corrupt.correlation = q->corrupt_cor.rho; 613 corrupt.correlation = q->corrupt_cor.rho;
623 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 614 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
624 615
625 rta->rta_len = skb_tail_pointer(skb) - b; 616 nla->nla_len = skb_tail_pointer(skb) - b;
626 617
627 return skb->len; 618 return skb->len;
628 619
629rtattr_failure: 620nla_put_failure:
630 nlmsg_trim(skb, b); 621 nlmsg_trim(skb, b);
631 return -1; 622 return -1;
632} 623}
@@ -678,7 +669,7 @@ static void netem_put(struct Qdisc *sch, unsigned long arg)
678} 669}
679 670
680static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 671static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
681 struct rtattr **tca, unsigned long *arg) 672 struct nlattr **tca, unsigned long *arg)
682{ 673{
683 return -ENOSYS; 674 return -ENOSYS;
684} 675}
@@ -705,7 +696,7 @@ static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
705 return NULL; 696 return NULL;
706} 697}
707 698
708static struct Qdisc_class_ops netem_class_ops = { 699static const struct Qdisc_class_ops netem_class_ops = {
709 .graft = netem_graft, 700 .graft = netem_graft,
710 .leaf = netem_leaf, 701 .leaf = netem_leaf,
711 .get = netem_get, 702 .get = netem_get,
@@ -717,7 +708,7 @@ static struct Qdisc_class_ops netem_class_ops = {
717 .dump = netem_dump_class, 708 .dump = netem_dump_class,
718}; 709};
719 710
720static struct Qdisc_ops netem_qdisc_ops = { 711static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
721 .id = "netem", 712 .id = "netem",
722 .cl_ops = &netem_class_ops, 713 .cl_ops = &netem_class_ops,
723 .priv_size = sizeof(struct netem_sched_data), 714 .priv_size = sizeof(struct netem_sched_data),
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de894096e442..4aa2b45dad0a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -224,16 +224,19 @@ prio_destroy(struct Qdisc* sch)
224 qdisc_destroy(q->queues[prio]); 224 qdisc_destroy(q->queues[prio]);
225} 225}
226 226
227static int prio_tune(struct Qdisc *sch, struct rtattr *opt) 227static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
228{ 228{
229 struct prio_sched_data *q = qdisc_priv(sch); 229 struct prio_sched_data *q = qdisc_priv(sch);
230 struct tc_prio_qopt *qopt; 230 struct tc_prio_qopt *qopt;
231 struct rtattr *tb[TCA_PRIO_MAX]; 231 struct nlattr *tb[TCA_PRIO_MAX + 1];
232 int err;
232 int i; 233 int i;
233 234
234 if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt, 235 err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
235 sizeof(*qopt))) 236 sizeof(*qopt));
236 return -EINVAL; 237 if (err < 0)
238 return err;
239
237 q->bands = qopt->bands; 240 q->bands = qopt->bands;
238 /* If we're multiqueue, make sure the number of incoming bands 241 /* If we're multiqueue, make sure the number of incoming bands
239 * matches the number of queues on the device we're associating with. 242 * matches the number of queues on the device we're associating with.
@@ -242,7 +245,7 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
242 * only one that is enabled for multiqueue, since it's the only one 245 * only one that is enabled for multiqueue, since it's the only one
243 * that interacts with the underlying device. 246 * that interacts with the underlying device.
244 */ 247 */
245 q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); 248 q->mq = nla_get_flag(tb[TCA_PRIO_MQ]);
246 if (q->mq) { 249 if (q->mq) {
247 if (sch->parent != TC_H_ROOT) 250 if (sch->parent != TC_H_ROOT)
248 return -EINVAL; 251 return -EINVAL;
@@ -296,7 +299,7 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
296 return 0; 299 return 0;
297} 300}
298 301
299static int prio_init(struct Qdisc *sch, struct rtattr *opt) 302static int prio_init(struct Qdisc *sch, struct nlattr *opt)
300{ 303{
301 struct prio_sched_data *q = qdisc_priv(sch); 304 struct prio_sched_data *q = qdisc_priv(sch);
302 int i; 305 int i;
@@ -319,20 +322,24 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
319{ 322{
320 struct prio_sched_data *q = qdisc_priv(sch); 323 struct prio_sched_data *q = qdisc_priv(sch);
321 unsigned char *b = skb_tail_pointer(skb); 324 unsigned char *b = skb_tail_pointer(skb);
322 struct rtattr *nest; 325 struct nlattr *nest;
323 struct tc_prio_qopt opt; 326 struct tc_prio_qopt opt;
324 327
325 opt.bands = q->bands; 328 opt.bands = q->bands;
326 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); 329 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
327 330
328 nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt); 331 nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
329 if (q->mq) 332 if (nest == NULL)
330 RTA_PUT_FLAG(skb, TCA_PRIO_MQ); 333 goto nla_put_failure;
331 RTA_NEST_COMPAT_END(skb, nest); 334 if (q->mq) {
335 if (nla_put_flag(skb, TCA_PRIO_MQ) < 0)
336 goto nla_put_failure;
337 }
338 nla_nest_compat_end(skb, nest);
332 339
333 return skb->len; 340 return skb->len;
334 341
335rtattr_failure: 342nla_put_failure:
336 nlmsg_trim(skb, b); 343 nlmsg_trim(skb, b);
337 return -1; 344 return -1;
338} 345}
@@ -392,7 +399,7 @@ static void prio_put(struct Qdisc *q, unsigned long cl)
392 return; 399 return;
393} 400}
394 401
395static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg) 402static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg)
396{ 403{
397 unsigned long cl = *arg; 404 unsigned long cl = *arg;
398 struct prio_sched_data *q = qdisc_priv(sch); 405 struct prio_sched_data *q = qdisc_priv(sch);
@@ -468,7 +475,7 @@ static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
468 return &q->filter_list; 475 return &q->filter_list;
469} 476}
470 477
471static struct Qdisc_class_ops prio_class_ops = { 478static const struct Qdisc_class_ops prio_class_ops = {
472 .graft = prio_graft, 479 .graft = prio_graft,
473 .leaf = prio_leaf, 480 .leaf = prio_leaf,
474 .get = prio_get, 481 .get = prio_get,
@@ -483,7 +490,7 @@ static struct Qdisc_class_ops prio_class_ops = {
483 .dump_stats = prio_dump_class_stats, 490 .dump_stats = prio_dump_class_stats,
484}; 491};
485 492
486static struct Qdisc_ops prio_qdisc_ops = { 493static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
487 .next = NULL, 494 .next = NULL,
488 .cl_ops = &prio_class_ops, 495 .cl_ops = &prio_class_ops,
489 .id = "prio", 496 .id = "prio",
@@ -500,7 +507,7 @@ static struct Qdisc_ops prio_qdisc_ops = {
500 .owner = THIS_MODULE, 507 .owner = THIS_MODULE,
501}; 508};
502 509
503static struct Qdisc_ops rr_qdisc_ops = { 510static struct Qdisc_ops rr_qdisc_ops __read_mostly = {
504 .next = NULL, 511 .next = NULL,
505 .cl_ops = &prio_class_ops, 512 .cl_ops = &prio_class_ops,
506 .id = "rr", 513 .id = "rr",
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9b95fefb70f4..3dcd493f4f4a 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -177,21 +177,21 @@ static void red_destroy(struct Qdisc *sch)
177static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit) 177static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
178{ 178{
179 struct Qdisc *q; 179 struct Qdisc *q;
180 struct rtattr *rta; 180 struct nlattr *nla;
181 int ret; 181 int ret;
182 182
183 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, 183 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
184 TC_H_MAKE(sch->handle, 1)); 184 TC_H_MAKE(sch->handle, 1));
185 if (q) { 185 if (q) {
186 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), 186 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
187 GFP_KERNEL); 187 GFP_KERNEL);
188 if (rta) { 188 if (nla) {
189 rta->rta_type = RTM_NEWQDISC; 189 nla->nla_type = RTM_NEWQDISC;
190 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 190 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
191 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 191 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
192 192
193 ret = q->ops->change(q, rta); 193 ret = q->ops->change(q, nla);
194 kfree(rta); 194 kfree(nla);
195 195
196 if (ret == 0) 196 if (ret == 0)
197 return q; 197 return q;
@@ -201,23 +201,31 @@ static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
201 return NULL; 201 return NULL;
202} 202}
203 203
204static int red_change(struct Qdisc *sch, struct rtattr *opt) 204static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
205 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
206 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
207};
208
209static int red_change(struct Qdisc *sch, struct nlattr *opt)
205{ 210{
206 struct red_sched_data *q = qdisc_priv(sch); 211 struct red_sched_data *q = qdisc_priv(sch);
207 struct rtattr *tb[TCA_RED_MAX]; 212 struct nlattr *tb[TCA_RED_MAX + 1];
208 struct tc_red_qopt *ctl; 213 struct tc_red_qopt *ctl;
209 struct Qdisc *child = NULL; 214 struct Qdisc *child = NULL;
215 int err;
210 216
211 if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt)) 217 if (opt == NULL)
212 return -EINVAL; 218 return -EINVAL;
213 219
214 if (tb[TCA_RED_PARMS-1] == NULL || 220 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
215 RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || 221 if (err < 0)
216 tb[TCA_RED_STAB-1] == NULL || 222 return err;
217 RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE) 223
224 if (tb[TCA_RED_PARMS] == NULL ||
225 tb[TCA_RED_STAB] == NULL)
218 return -EINVAL; 226 return -EINVAL;
219 227
220 ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); 228 ctl = nla_data(tb[TCA_RED_PARMS]);
221 229
222 if (ctl->limit > 0) { 230 if (ctl->limit > 0) {
223 child = red_create_dflt(sch, ctl->limit); 231 child = red_create_dflt(sch, ctl->limit);
@@ -235,7 +243,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
235 243
236 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, 244 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
237 ctl->Plog, ctl->Scell_log, 245 ctl->Plog, ctl->Scell_log,
238 RTA_DATA(tb[TCA_RED_STAB-1])); 246 nla_data(tb[TCA_RED_STAB]));
239 247
240 if (skb_queue_empty(&sch->q)) 248 if (skb_queue_empty(&sch->q))
241 red_end_of_idle_period(&q->parms); 249 red_end_of_idle_period(&q->parms);
@@ -244,7 +252,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
244 return 0; 252 return 0;
245} 253}
246 254
247static int red_init(struct Qdisc* sch, struct rtattr *opt) 255static int red_init(struct Qdisc* sch, struct nlattr *opt)
248{ 256{
249 struct red_sched_data *q = qdisc_priv(sch); 257 struct red_sched_data *q = qdisc_priv(sch);
250 258
@@ -255,7 +263,7 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt)
255static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 263static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
256{ 264{
257 struct red_sched_data *q = qdisc_priv(sch); 265 struct red_sched_data *q = qdisc_priv(sch);
258 struct rtattr *opts = NULL; 266 struct nlattr *opts = NULL;
259 struct tc_red_qopt opt = { 267 struct tc_red_qopt opt = {
260 .limit = q->limit, 268 .limit = q->limit,
261 .flags = q->flags, 269 .flags = q->flags,
@@ -266,12 +274,14 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
266 .Scell_log = q->parms.Scell_log, 274 .Scell_log = q->parms.Scell_log,
267 }; 275 };
268 276
269 opts = RTA_NEST(skb, TCA_OPTIONS); 277 opts = nla_nest_start(skb, TCA_OPTIONS);
270 RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); 278 if (opts == NULL)
271 return RTA_NEST_END(skb, opts); 279 goto nla_put_failure;
280 NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
281 return nla_nest_end(skb, opts);
272 282
273rtattr_failure: 283nla_put_failure:
274 return RTA_NEST_CANCEL(skb, opts); 284 return nla_nest_cancel(skb, opts);
275} 285}
276 286
277static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 287static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -332,7 +342,7 @@ static void red_put(struct Qdisc *sch, unsigned long arg)
332} 342}
333 343
334static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 344static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
335 struct rtattr **tca, unsigned long *arg) 345 struct nlattr **tca, unsigned long *arg)
336{ 346{
337 return -ENOSYS; 347 return -ENOSYS;
338} 348}
@@ -359,7 +369,7 @@ static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
359 return NULL; 369 return NULL;
360} 370}
361 371
362static struct Qdisc_class_ops red_class_ops = { 372static const struct Qdisc_class_ops red_class_ops = {
363 .graft = red_graft, 373 .graft = red_graft,
364 .leaf = red_leaf, 374 .leaf = red_leaf,
365 .get = red_get, 375 .get = red_get,
@@ -371,7 +381,7 @@ static struct Qdisc_class_ops red_class_ops = {
371 .dump = red_dump_class, 381 .dump = red_dump_class,
372}; 382};
373 383
374static struct Qdisc_ops red_qdisc_ops = { 384static struct Qdisc_ops red_qdisc_ops __read_mostly = {
375 .id = "red", 385 .id = "red",
376 .priv_size = sizeof(struct red_sched_data), 386 .priv_size = sizeof(struct red_sched_data),
377 .cl_ops = &red_class_ops, 387 .cl_ops = &red_class_ops,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b542c875e154..a20e2ef7704b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -95,6 +95,7 @@ struct sfq_sched_data
95 int limit; 95 int limit;
96 96
97/* Variables */ 97/* Variables */
98 struct tcf_proto *filter_list;
98 struct timer_list perturb_timer; 99 struct timer_list perturb_timer;
99 u32 perturbation; 100 u32 perturbation;
100 sfq_index tail; /* Index of current slot in round */ 101 sfq_index tail; /* Index of current slot in round */
@@ -122,7 +123,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
122 { 123 {
123 const struct iphdr *iph = ip_hdr(skb); 124 const struct iphdr *iph = ip_hdr(skb);
124 h = iph->daddr; 125 h = iph->daddr;
125 h2 = iph->saddr^iph->protocol; 126 h2 = iph->saddr ^ iph->protocol;
126 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 127 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
127 (iph->protocol == IPPROTO_TCP || 128 (iph->protocol == IPPROTO_TCP ||
128 iph->protocol == IPPROTO_UDP || 129 iph->protocol == IPPROTO_UDP ||
@@ -137,7 +138,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
137 { 138 {
138 struct ipv6hdr *iph = ipv6_hdr(skb); 139 struct ipv6hdr *iph = ipv6_hdr(skb);
139 h = iph->daddr.s6_addr32[3]; 140 h = iph->daddr.s6_addr32[3];
140 h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; 141 h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr;
141 if (iph->nexthdr == IPPROTO_TCP || 142 if (iph->nexthdr == IPPROTO_TCP ||
142 iph->nexthdr == IPPROTO_UDP || 143 iph->nexthdr == IPPROTO_UDP ||
143 iph->nexthdr == IPPROTO_UDPLITE || 144 iph->nexthdr == IPPROTO_UDPLITE ||
@@ -148,12 +149,46 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
148 break; 149 break;
149 } 150 }
150 default: 151 default:
151 h = (u32)(unsigned long)skb->dst^skb->protocol; 152 h = (unsigned long)skb->dst ^ skb->protocol;
152 h2 = (u32)(unsigned long)skb->sk; 153 h2 = (unsigned long)skb->sk;
153 } 154 }
155
154 return sfq_fold_hash(q, h, h2); 156 return sfq_fold_hash(q, h, h2);
155} 157}
156 158
159static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
160 int *qerr)
161{
162 struct sfq_sched_data *q = qdisc_priv(sch);
163 struct tcf_result res;
164 int result;
165
166 if (TC_H_MAJ(skb->priority) == sch->handle &&
167 TC_H_MIN(skb->priority) > 0 &&
168 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
169 return TC_H_MIN(skb->priority);
170
171 if (!q->filter_list)
172 return sfq_hash(q, skb) + 1;
173
174 *qerr = NET_XMIT_BYPASS;
175 result = tc_classify(skb, q->filter_list, &res);
176 if (result >= 0) {
177#ifdef CONFIG_NET_CLS_ACT
178 switch (result) {
179 case TC_ACT_STOLEN:
180 case TC_ACT_QUEUED:
181 *qerr = NET_XMIT_SUCCESS;
182 case TC_ACT_SHOT:
183 return 0;
184 }
185#endif
186 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
187 return TC_H_MIN(res.classid);
188 }
189 return 0;
190}
191
157static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 192static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
158{ 193{
159 sfq_index p, n; 194 sfq_index p, n;
@@ -208,7 +243,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
208 drop a packet from it */ 243 drop a packet from it */
209 244
210 if (d > 1) { 245 if (d > 1) {
211 sfq_index x = q->dep[d+SFQ_DEPTH].next; 246 sfq_index x = q->dep[d + SFQ_DEPTH].next;
212 skb = q->qs[x].prev; 247 skb = q->qs[x].prev;
213 len = skb->len; 248 len = skb->len;
214 __skb_unlink(skb, &q->qs[x]); 249 __skb_unlink(skb, &q->qs[x]);
@@ -241,17 +276,28 @@ static unsigned int sfq_drop(struct Qdisc *sch)
241} 276}
242 277
243static int 278static int
244sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) 279sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
245{ 280{
246 struct sfq_sched_data *q = qdisc_priv(sch); 281 struct sfq_sched_data *q = qdisc_priv(sch);
247 unsigned hash = sfq_hash(q, skb); 282 unsigned int hash;
248 sfq_index x; 283 sfq_index x;
284 int ret;
285
286 hash = sfq_classify(skb, sch, &ret);
287 if (hash == 0) {
288 if (ret == NET_XMIT_BYPASS)
289 sch->qstats.drops++;
290 kfree_skb(skb);
291 return ret;
292 }
293 hash--;
249 294
250 x = q->ht[hash]; 295 x = q->ht[hash];
251 if (x == SFQ_DEPTH) { 296 if (x == SFQ_DEPTH) {
252 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 297 q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
253 q->hash[x] = hash; 298 q->hash[x] = hash;
254 } 299 }
300
255 /* If selected queue has length q->limit, this means that 301 /* If selected queue has length q->limit, this means that
256 * all another queues are empty and that we do simple tail drop, 302 * all another queues are empty and that we do simple tail drop,
257 * i.e. drop _this_ packet. 303 * i.e. drop _this_ packet.
@@ -284,17 +330,28 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
284} 330}
285 331
286static int 332static int
287sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) 333sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
288{ 334{
289 struct sfq_sched_data *q = qdisc_priv(sch); 335 struct sfq_sched_data *q = qdisc_priv(sch);
290 unsigned hash = sfq_hash(q, skb); 336 unsigned int hash;
291 sfq_index x; 337 sfq_index x;
338 int ret;
339
340 hash = sfq_classify(skb, sch, &ret);
341 if (hash == 0) {
342 if (ret == NET_XMIT_BYPASS)
343 sch->qstats.drops++;
344 kfree_skb(skb);
345 return ret;
346 }
347 hash--;
292 348
293 x = q->ht[hash]; 349 x = q->ht[hash];
294 if (x == SFQ_DEPTH) { 350 if (x == SFQ_DEPTH) {
295 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 351 q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
296 q->hash[x] = hash; 352 q->hash[x] = hash;
297 } 353 }
354
298 sch->qstats.backlog += skb->len; 355 sch->qstats.backlog += skb->len;
299 __skb_queue_head(&q->qs[x], skb); 356 __skb_queue_head(&q->qs[x], skb);
300 /* If selected queue has length q->limit+1, this means that 357 /* If selected queue has length q->limit+1, this means that
@@ -310,6 +367,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
310 kfree_skb(skb); 367 kfree_skb(skb);
311 return NET_XMIT_CN; 368 return NET_XMIT_CN;
312 } 369 }
370
313 sfq_inc(q, x); 371 sfq_inc(q, x);
314 if (q->qs[x].qlen == 1) { /* The flow is new */ 372 if (q->qs[x].qlen == 1) { /* The flow is new */
315 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 373 if (q->tail == SFQ_DEPTH) { /* It is the first flow */
@@ -322,6 +380,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
322 q->tail = x; 380 q->tail = x;
323 } 381 }
324 } 382 }
383
325 if (++sch->q.qlen <= q->limit) { 384 if (++sch->q.qlen <= q->limit) {
326 sch->qstats.requeues++; 385 sch->qstats.requeues++;
327 return 0; 386 return 0;
@@ -336,7 +395,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
336 395
337 396
338static struct sk_buff * 397static struct sk_buff *
339sfq_dequeue(struct Qdisc* sch) 398sfq_dequeue(struct Qdisc *sch)
340{ 399{
341 struct sfq_sched_data *q = qdisc_priv(sch); 400 struct sfq_sched_data *q = qdisc_priv(sch);
342 struct sk_buff *skb; 401 struct sk_buff *skb;
@@ -373,7 +432,7 @@ sfq_dequeue(struct Qdisc* sch)
373} 432}
374 433
375static void 434static void
376sfq_reset(struct Qdisc* sch) 435sfq_reset(struct Qdisc *sch)
377{ 436{
378 struct sk_buff *skb; 437 struct sk_buff *skb;
379 438
@@ -383,27 +442,27 @@ sfq_reset(struct Qdisc* sch)
383 442
384static void sfq_perturbation(unsigned long arg) 443static void sfq_perturbation(unsigned long arg)
385{ 444{
386 struct Qdisc *sch = (struct Qdisc*)arg; 445 struct Qdisc *sch = (struct Qdisc *)arg;
387 struct sfq_sched_data *q = qdisc_priv(sch); 446 struct sfq_sched_data *q = qdisc_priv(sch);
388 447
389 get_random_bytes(&q->perturbation, 4); 448 q->perturbation = net_random();
390 449
391 if (q->perturb_period) 450 if (q->perturb_period)
392 mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 451 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
393} 452}
394 453
395static int sfq_change(struct Qdisc *sch, struct rtattr *opt) 454static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
396{ 455{
397 struct sfq_sched_data *q = qdisc_priv(sch); 456 struct sfq_sched_data *q = qdisc_priv(sch);
398 struct tc_sfq_qopt *ctl = RTA_DATA(opt); 457 struct tc_sfq_qopt *ctl = nla_data(opt);
399 unsigned int qlen; 458 unsigned int qlen;
400 459
401 if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) 460 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
402 return -EINVAL; 461 return -EINVAL;
403 462
404 sch_tree_lock(sch); 463 sch_tree_lock(sch);
405 q->quantum = ctl->quantum ? : psched_mtu(sch->dev); 464 q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
406 q->perturb_period = ctl->perturb_period*HZ; 465 q->perturb_period = ctl->perturb_period * HZ;
407 if (ctl->limit) 466 if (ctl->limit)
408 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); 467 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
409 468
@@ -415,41 +474,44 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
415 del_timer(&q->perturb_timer); 474 del_timer(&q->perturb_timer);
416 if (q->perturb_period) { 475 if (q->perturb_period) {
417 mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 476 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
418 get_random_bytes(&q->perturbation, 4); 477 q->perturbation = net_random();
419 } 478 }
420 sch_tree_unlock(sch); 479 sch_tree_unlock(sch);
421 return 0; 480 return 0;
422} 481}
423 482
424static int sfq_init(struct Qdisc *sch, struct rtattr *opt) 483static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
425{ 484{
426 struct sfq_sched_data *q = qdisc_priv(sch); 485 struct sfq_sched_data *q = qdisc_priv(sch);
427 int i; 486 int i;
428 487
429 init_timer(&q->perturb_timer);
430 q->perturb_timer.data = (unsigned long)sch;
431 q->perturb_timer.function = sfq_perturbation; 488 q->perturb_timer.function = sfq_perturbation;
489 q->perturb_timer.data = (unsigned long)sch;;
490 init_timer_deferrable(&q->perturb_timer);
432 491
433 for (i=0; i<SFQ_HASH_DIVISOR; i++) 492 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
434 q->ht[i] = SFQ_DEPTH; 493 q->ht[i] = SFQ_DEPTH;
435 for (i=0; i<SFQ_DEPTH; i++) { 494
495 for (i = 0; i < SFQ_DEPTH; i++) {
436 skb_queue_head_init(&q->qs[i]); 496 skb_queue_head_init(&q->qs[i]);
437 q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; 497 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
438 q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; 498 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
439 } 499 }
500
440 q->limit = SFQ_DEPTH - 1; 501 q->limit = SFQ_DEPTH - 1;
441 q->max_depth = 0; 502 q->max_depth = 0;
442 q->tail = SFQ_DEPTH; 503 q->tail = SFQ_DEPTH;
443 if (opt == NULL) { 504 if (opt == NULL) {
444 q->quantum = psched_mtu(sch->dev); 505 q->quantum = psched_mtu(sch->dev);
445 q->perturb_period = 0; 506 q->perturb_period = 0;
446 get_random_bytes(&q->perturbation, 4); 507 q->perturbation = net_random();
447 } else { 508 } else {
448 int err = sfq_change(sch, opt); 509 int err = sfq_change(sch, opt);
449 if (err) 510 if (err)
450 return err; 511 return err;
451 } 512 }
452 for (i=0; i<SFQ_DEPTH; i++) 513
514 for (i = 0; i < SFQ_DEPTH; i++)
453 sfq_link(q, i); 515 sfq_link(q, i);
454 return 0; 516 return 0;
455} 517}
@@ -457,6 +519,8 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
457static void sfq_destroy(struct Qdisc *sch) 519static void sfq_destroy(struct Qdisc *sch)
458{ 520{
459 struct sfq_sched_data *q = qdisc_priv(sch); 521 struct sfq_sched_data *q = qdisc_priv(sch);
522
523 tcf_destroy_chain(q->filter_list);
460 del_timer(&q->perturb_timer); 524 del_timer(&q->perturb_timer);
461} 525}
462 526
@@ -467,24 +531,94 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
467 struct tc_sfq_qopt opt; 531 struct tc_sfq_qopt opt;
468 532
469 opt.quantum = q->quantum; 533 opt.quantum = q->quantum;
470 opt.perturb_period = q->perturb_period/HZ; 534 opt.perturb_period = q->perturb_period / HZ;
471 535
472 opt.limit = q->limit; 536 opt.limit = q->limit;
473 opt.divisor = SFQ_HASH_DIVISOR; 537 opt.divisor = SFQ_HASH_DIVISOR;
474 opt.flows = q->limit; 538 opt.flows = q->limit;
475 539
476 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 540 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
477 541
478 return skb->len; 542 return skb->len;
479 543
480rtattr_failure: 544nla_put_failure:
481 nlmsg_trim(skb, b); 545 nlmsg_trim(skb, b);
482 return -1; 546 return -1;
483} 547}
484 548
485static struct Qdisc_ops sfq_qdisc_ops = { 549static int sfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
486 .next = NULL, 550 struct nlattr **tca, unsigned long *arg)
487 .cl_ops = NULL, 551{
552 return -EOPNOTSUPP;
553}
554
555static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
556{
557 return 0;
558}
559
560static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
561{
562 struct sfq_sched_data *q = qdisc_priv(sch);
563
564 if (cl)
565 return NULL;
566 return &q->filter_list;
567}
568
569static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
570 struct sk_buff *skb, struct tcmsg *tcm)
571{
572 tcm->tcm_handle |= TC_H_MIN(cl);
573 return 0;
574}
575
576static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
577 struct gnet_dump *d)
578{
579 struct sfq_sched_data *q = qdisc_priv(sch);
580 sfq_index idx = q->ht[cl-1];
581 struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
582 struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
583
584 if (gnet_stats_copy_queue(d, &qs) < 0)
585 return -1;
586 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
587}
588
589static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
590{
591 struct sfq_sched_data *q = qdisc_priv(sch);
592 unsigned int i;
593
594 if (arg->stop)
595 return;
596
597 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
598 if (q->ht[i] == SFQ_DEPTH ||
599 arg->count < arg->skip) {
600 arg->count++;
601 continue;
602 }
603 if (arg->fn(sch, i + 1, arg) < 0) {
604 arg->stop = 1;
605 break;
606 }
607 arg->count++;
608 }
609}
610
611static const struct Qdisc_class_ops sfq_class_ops = {
612 .get = sfq_get,
613 .change = sfq_change_class,
614 .tcf_chain = sfq_find_tcf,
615 .dump = sfq_dump_class,
616 .dump_stats = sfq_dump_class_stats,
617 .walk = sfq_walk,
618};
619
620static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
621 .cl_ops = &sfq_class_ops,
488 .id = "sfq", 622 .id = "sfq",
489 .priv_size = sizeof(struct sfq_sched_data), 623 .priv_size = sizeof(struct sfq_sched_data),
490 .enqueue = sfq_enqueue, 624 .enqueue = sfq_enqueue,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b0d81098b0ee..0b7d78f59d8c 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -245,20 +245,21 @@ static void tbf_reset(struct Qdisc* sch)
245static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) 245static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
246{ 246{
247 struct Qdisc *q; 247 struct Qdisc *q;
248 struct rtattr *rta; 248 struct nlattr *nla;
249 int ret; 249 int ret;
250 250
251 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, 251 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
252 TC_H_MAKE(sch->handle, 1)); 252 TC_H_MAKE(sch->handle, 1));
253 if (q) { 253 if (q) {
254 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 254 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
255 if (rta) { 255 GFP_KERNEL);
256 rta->rta_type = RTM_NEWQDISC; 256 if (nla) {
257 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 257 nla->nla_type = RTM_NEWQDISC;
258 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 258 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
259 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
259 260
260 ret = q->ops->change(q, rta); 261 ret = q->ops->change(q, nla);
261 kfree(rta); 262 kfree(nla);
262 263
263 if (ret == 0) 264 if (ret == 0)
264 return q; 265 return q;
@@ -269,30 +270,39 @@ static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
269 return NULL; 270 return NULL;
270} 271}
271 272
272static int tbf_change(struct Qdisc* sch, struct rtattr *opt) 273static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
274 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
275 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
276 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
277};
278
279static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
273{ 280{
274 int err = -EINVAL; 281 int err;
275 struct tbf_sched_data *q = qdisc_priv(sch); 282 struct tbf_sched_data *q = qdisc_priv(sch);
276 struct rtattr *tb[TCA_TBF_PTAB]; 283 struct nlattr *tb[TCA_TBF_PTAB + 1];
277 struct tc_tbf_qopt *qopt; 284 struct tc_tbf_qopt *qopt;
278 struct qdisc_rate_table *rtab = NULL; 285 struct qdisc_rate_table *rtab = NULL;
279 struct qdisc_rate_table *ptab = NULL; 286 struct qdisc_rate_table *ptab = NULL;
280 struct Qdisc *child = NULL; 287 struct Qdisc *child = NULL;
281 int max_size,n; 288 int max_size,n;
282 289
283 if (rtattr_parse_nested(tb, TCA_TBF_PTAB, opt) || 290 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
284 tb[TCA_TBF_PARMS-1] == NULL || 291 if (err < 0)
285 RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt)) 292 return err;
293
294 err = -EINVAL;
295 if (tb[TCA_TBF_PARMS] == NULL)
286 goto done; 296 goto done;
287 297
288 qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]); 298 qopt = nla_data(tb[TCA_TBF_PARMS]);
289 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]); 299 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
290 if (rtab == NULL) 300 if (rtab == NULL)
291 goto done; 301 goto done;
292 302
293 if (qopt->peakrate.rate) { 303 if (qopt->peakrate.rate) {
294 if (qopt->peakrate.rate > qopt->rate.rate) 304 if (qopt->peakrate.rate > qopt->rate.rate)
295 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]); 305 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
296 if (ptab == NULL) 306 if (ptab == NULL)
297 goto done; 307 goto done;
298 } 308 }
@@ -339,7 +349,7 @@ done:
339 return err; 349 return err;
340} 350}
341 351
342static int tbf_init(struct Qdisc* sch, struct rtattr *opt) 352static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
343{ 353{
344 struct tbf_sched_data *q = qdisc_priv(sch); 354 struct tbf_sched_data *q = qdisc_priv(sch);
345 355
@@ -370,12 +380,12 @@ static void tbf_destroy(struct Qdisc *sch)
370static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) 380static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
371{ 381{
372 struct tbf_sched_data *q = qdisc_priv(sch); 382 struct tbf_sched_data *q = qdisc_priv(sch);
373 unsigned char *b = skb_tail_pointer(skb); 383 struct nlattr *nest;
374 struct rtattr *rta;
375 struct tc_tbf_qopt opt; 384 struct tc_tbf_qopt opt;
376 385
377 rta = (struct rtattr*)b; 386 nest = nla_nest_start(skb, TCA_OPTIONS);
378 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 387 if (nest == NULL)
388 goto nla_put_failure;
379 389
380 opt.limit = q->limit; 390 opt.limit = q->limit;
381 opt.rate = q->R_tab->rate; 391 opt.rate = q->R_tab->rate;
@@ -385,13 +395,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
385 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 395 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
386 opt.mtu = q->mtu; 396 opt.mtu = q->mtu;
387 opt.buffer = q->buffer; 397 opt.buffer = q->buffer;
388 RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt); 398 NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
389 rta->rta_len = skb_tail_pointer(skb) - b;
390 399
400 nla_nest_end(skb, nest);
391 return skb->len; 401 return skb->len;
392 402
393rtattr_failure: 403nla_put_failure:
394 nlmsg_trim(skb, b); 404 nla_nest_cancel(skb, nest);
395 return -1; 405 return -1;
396} 406}
397 407
@@ -442,7 +452,7 @@ static void tbf_put(struct Qdisc *sch, unsigned long arg)
442} 452}
443 453
444static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 454static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
445 struct rtattr **tca, unsigned long *arg) 455 struct nlattr **tca, unsigned long *arg)
446{ 456{
447 return -ENOSYS; 457 return -ENOSYS;
448} 458}
@@ -469,7 +479,7 @@ static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
469 return NULL; 479 return NULL;
470} 480}
471 481
472static struct Qdisc_class_ops tbf_class_ops = 482static const struct Qdisc_class_ops tbf_class_ops =
473{ 483{
474 .graft = tbf_graft, 484 .graft = tbf_graft,
475 .leaf = tbf_leaf, 485 .leaf = tbf_leaf,
@@ -482,7 +492,7 @@ static struct Qdisc_class_ops tbf_class_ops =
482 .dump = tbf_dump_class, 492 .dump = tbf_dump_class,
483}; 493};
484 494
485static struct Qdisc_ops tbf_qdisc_ops = { 495static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
486 .next = NULL, 496 .next = NULL,
487 .cl_ops = &tbf_class_ops, 497 .cl_ops = &tbf_class_ops,
488 .id = "tbf", 498 .id = "tbf",
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 421281d9dd1d..0444fd0f0d22 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -71,7 +71,7 @@ struct teql_sched_data
71 71
72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next) 72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 73
74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST) 74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 75
76/* "teql*" qdisc routines */ 76/* "teql*" qdisc routines */
77 77
@@ -168,7 +168,7 @@ teql_destroy(struct Qdisc* sch)
168 } 168 }
169} 169}
170 170
171static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt) 171static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
172{ 172{
173 struct net_device *dev = sch->dev; 173 struct net_device *dev = sch->dev;
174 struct teql_master *m = (struct teql_master*)sch->ops; 174 struct teql_master *m = (struct teql_master*)sch->ops;
@@ -252,6 +252,9 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
252static inline int teql_resolve(struct sk_buff *skb, 252static inline int teql_resolve(struct sk_buff *skb,
253 struct sk_buff *skb_res, struct net_device *dev) 253 struct sk_buff *skb_res, struct net_device *dev)
254{ 254{
255 if (dev->qdisc == &noop_qdisc)
256 return -ENODEV;
257
255 if (dev->header_ops == NULL || 258 if (dev->header_ops == NULL ||
256 skb->dst == NULL || 259 skb->dst == NULL ||
257 skb->dst->neighbour == NULL) 260 skb->dst->neighbour == NULL)
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 8210f549c492..0b79f869c4ea 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -6,10 +6,11 @@ menuconfig IP_SCTP
6 tristate "The SCTP Protocol (EXPERIMENTAL)" 6 tristate "The SCTP Protocol (EXPERIMENTAL)"
7 depends on INET && EXPERIMENTAL 7 depends on INET && EXPERIMENTAL
8 depends on IPV6 || IPV6=n 8 depends on IPV6 || IPV6=n
9 select CRYPTO if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5 9 select CRYPTO
10 select CRYPTO_HMAC if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5 10 select CRYPTO_HMAC
11 select CRYPTO_SHA1 if SCTP_HMAC_SHA1 11 select CRYPTO_SHA1
12 select CRYPTO_MD5 if SCTP_HMAC_MD5 12 select CRYPTO_MD5 if SCTP_HMAC_MD5
13 select LIBCRC32C
13 ---help--- 14 ---help---
14 Stream Control Transmission Protocol 15 Stream Control Transmission Protocol
15 16
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 1da7204d9b42..f5356b9d5ee3 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -9,7 +9,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
9 transport.o chunk.o sm_make_chunk.o ulpevent.o \ 9 transport.o chunk.o sm_make_chunk.o ulpevent.o \
10 inqueue.o outqueue.o ulpqueue.o command.o \ 10 inqueue.o outqueue.o ulpqueue.o command.o \
11 tsnmap.o bind_addr.o socket.o primitive.o \ 11 tsnmap.o bind_addr.o socket.o primitive.o \
12 output.o input.o debug.o ssnmap.o proc.o crc32c.o \ 12 output.o input.o debug.o ssnmap.o proc.o \
13 auth.o 13 auth.o
14 14
15sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o 15sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 03158e3665da..a016e78061f4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -61,6 +61,7 @@
61 61
62/* Forward declarations for internal functions. */ 62/* Forward declarations for internal functions. */
63static void sctp_assoc_bh_rcv(struct work_struct *work); 63static void sctp_assoc_bh_rcv(struct work_struct *work);
64static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
64 65
65 66
66/* 1st Level Abstractions. */ 67/* 1st Level Abstractions. */
@@ -167,11 +168,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
167 sp->autoclose * HZ; 168 sp->autoclose * HZ;
168 169
169 /* Initilizes the timers */ 170 /* Initilizes the timers */
170 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) { 171 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
171 init_timer(&asoc->timers[i]); 172 setup_timer(&asoc->timers[i], sctp_timer_events[i],
172 asoc->timers[i].function = sctp_timer_events[i]; 173 (unsigned long)asoc);
173 asoc->timers[i].data = (unsigned long) asoc;
174 }
175 174
176 /* Pull default initialization values from the sock options. 175 /* Pull default initialization values from the sock options.
177 * Note: This assumes that the values have already been 176 * Note: This assumes that the values have already been
@@ -244,6 +243,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
244 asoc->addip_serial = asoc->c.initial_tsn; 243 asoc->addip_serial = asoc->c.initial_tsn;
245 244
246 INIT_LIST_HEAD(&asoc->addip_chunk_list); 245 INIT_LIST_HEAD(&asoc->addip_chunk_list);
246 INIT_LIST_HEAD(&asoc->asconf_ack_list);
247 247
248 /* Make an empty list of remote transport addresses. */ 248 /* Make an empty list of remote transport addresses. */
249 INIT_LIST_HEAD(&asoc->peer.transport_addr_list); 249 INIT_LIST_HEAD(&asoc->peer.transport_addr_list);
@@ -262,10 +262,14 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
262 */ 262 */
263 asoc->peer.sack_needed = 1; 263 asoc->peer.sack_needed = 1;
264 264
265 /* Assume that the peer recongizes ASCONF until reported otherwise 265 /* Assume that the peer will tell us if he recognizes ASCONF
266 * via an ERROR chunk. 266 * as part of INIT exchange.
267 * The sctp_addip_noauth option is there for backward compatibilty
268 * and will revert old behavior.
267 */ 269 */
268 asoc->peer.asconf_capable = 1; 270 asoc->peer.asconf_capable = 0;
271 if (sctp_addip_noauth)
272 asoc->peer.asconf_capable = 1;
269 273
270 /* Create an input queue. */ 274 /* Create an input queue. */
271 sctp_inq_init(&asoc->base.inqueue); 275 sctp_inq_init(&asoc->base.inqueue);
@@ -429,8 +433,7 @@ void sctp_association_free(struct sctp_association *asoc)
429 asoc->peer.transport_count = 0; 433 asoc->peer.transport_count = 0;
430 434
431 /* Free any cached ASCONF_ACK chunk. */ 435 /* Free any cached ASCONF_ACK chunk. */
432 if (asoc->addip_last_asconf_ack) 436 sctp_assoc_free_asconf_acks(asoc);
433 sctp_chunk_free(asoc->addip_last_asconf_ack);
434 437
435 /* Free any cached ASCONF chunk. */ 438 /* Free any cached ASCONF chunk. */
436 if (asoc->addip_last_asconf) 439 if (asoc->addip_last_asconf)
@@ -728,6 +731,23 @@ struct sctp_transport *sctp_assoc_lookup_paddr(
728 return NULL; 731 return NULL;
729} 732}
730 733
734/* Remove all transports except a give one */
735void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
736 struct sctp_transport *primary)
737{
738 struct sctp_transport *temp;
739 struct sctp_transport *t;
740
741 list_for_each_entry_safe(t, temp, &asoc->peer.transport_addr_list,
742 transports) {
743 /* if the current transport is not the primary one, delete it */
744 if (t != primary)
745 sctp_assoc_rm_peer(asoc, t);
746 }
747
748 return;
749}
750
731/* Engage in transport control operations. 751/* Engage in transport control operations.
732 * Mark the transport up or down and send a notification to the user. 752 * Mark the transport up or down and send a notification to the user.
733 * Select and update the new active and retran paths. 753 * Select and update the new active and retran paths.
@@ -1466,3 +1486,56 @@ retry:
1466 asoc->assoc_id = (sctp_assoc_t) assoc_id; 1486 asoc->assoc_id = (sctp_assoc_t) assoc_id;
1467 return error; 1487 return error;
1468} 1488}
1489
1490/* Free asconf_ack cache */
1491static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc)
1492{
1493 struct sctp_chunk *ack;
1494 struct sctp_chunk *tmp;
1495
1496 list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
1497 transmitted_list) {
1498 list_del_init(&ack->transmitted_list);
1499 sctp_chunk_free(ack);
1500 }
1501}
1502
1503/* Clean up the ASCONF_ACK queue */
1504void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc)
1505{
1506 struct sctp_chunk *ack;
1507 struct sctp_chunk *tmp;
1508
1509 /* We can remove all the entries from the queue upto
1510 * the "Peer-Sequence-Number".
1511 */
1512 list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
1513 transmitted_list) {
1514 if (ack->subh.addip_hdr->serial ==
1515 htonl(asoc->peer.addip_serial))
1516 break;
1517
1518 list_del_init(&ack->transmitted_list);
1519 sctp_chunk_free(ack);
1520 }
1521}
1522
1523/* Find the ASCONF_ACK whose serial number matches ASCONF */
1524struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
1525 const struct sctp_association *asoc,
1526 __be32 serial)
1527{
1528 struct sctp_chunk *ack = NULL;
1529
1530 /* Walk through the list of cached ASCONF-ACKs and find the
1531 * ack chunk whose serial number matches that of the request.
1532 */
1533 list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) {
1534 if (ack->subh.addip_hdr->serial == serial) {
1535 sctp_chunk_hold(ack);
1536 break;
1537 }
1538 }
1539
1540 return ack;
1541}
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 8af1004abefe..97e6ebd14500 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -54,11 +54,13 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = {
54 /* id 2 is reserved as well */ 54 /* id 2 is reserved as well */
55 .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, 55 .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2,
56 }, 56 },
57#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE)
57 { 58 {
58 .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, 59 .hmac_id = SCTP_AUTH_HMAC_ID_SHA256,
59 .hmac_name="hmac(sha256)", 60 .hmac_name="hmac(sha256)",
60 .hmac_len = SCTP_SHA256_SIG_SIZE, 61 .hmac_len = SCTP_SHA256_SIG_SIZE,
61 } 62 }
63#endif
62}; 64};
63 65
64 66
@@ -556,7 +558,7 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
556 return &sctp_hmac_list[id]; 558 return &sctp_hmac_list[id];
557} 559}
558 560
559static int __sctp_auth_find_hmacid(__u16 *hmacs, int n_elts, __u16 hmac_id) 561static int __sctp_auth_find_hmacid(__be16 *hmacs, int n_elts, __be16 hmac_id)
560{ 562{
561 int found = 0; 563 int found = 0;
562 int i; 564 int i;
@@ -573,7 +575,7 @@ static int __sctp_auth_find_hmacid(__u16 *hmacs, int n_elts, __u16 hmac_id)
573 575
574/* See if the HMAC_ID is one that we claim as supported */ 576/* See if the HMAC_ID is one that we claim as supported */
575int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, 577int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
576 __u16 hmac_id) 578 __be16 hmac_id)
577{ 579{
578 struct sctp_hmac_algo_param *hmacs; 580 struct sctp_hmac_algo_param *hmacs;
579 __u16 n_elt; 581 __u16 n_elt;
@@ -631,7 +633,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
631 int found = 0; 633 int found = 0;
632 int i; 634 int i;
633 635
634 if (!param) 636 if (!param || param->param_hdr.length == 0)
635 return 0; 637 return 0;
636 638
637 len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t); 639 len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index dfffa94fb9f6..13fbfb449a55 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -105,6 +105,32 @@ out:
105 return error; 105 return error;
106} 106}
107 107
108/* Exactly duplicate the address lists. This is necessary when doing
109 * peer-offs and accepts. We don't want to put all the current system
110 * addresses into the endpoint. That's useless. But we do want duplicat
111 * the list of bound addresses that the older endpoint used.
112 */
113int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
114 const struct sctp_bind_addr *src,
115 gfp_t gfp)
116{
117 struct sctp_sockaddr_entry *addr;
118 struct list_head *pos;
119 int error = 0;
120
121 /* All addresses share the same port. */
122 dest->port = src->port;
123
124 list_for_each(pos, &src->address_list) {
125 addr = list_entry(pos, struct sctp_sockaddr_entry, list);
126 error = sctp_add_bind_addr(dest, &addr->a, 1, gfp);
127 if (error < 0)
128 break;
129 }
130
131 return error;
132}
133
108/* Initialize the SCTP_bind_addr structure for either an endpoint or 134/* Initialize the SCTP_bind_addr structure for either an endpoint or
109 * an association. 135 * an association.
110 */ 136 */
@@ -145,7 +171,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
145 171
146/* Add an address to the bind address list in the SCTP_bind_addr structure. */ 172/* Add an address to the bind address list in the SCTP_bind_addr structure. */
147int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, 173int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
148 __u8 use_as_src, gfp_t gfp) 174 __u8 addr_state, gfp_t gfp)
149{ 175{
150 struct sctp_sockaddr_entry *addr; 176 struct sctp_sockaddr_entry *addr;
151 177
@@ -162,7 +188,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
162 if (!addr->a.v4.sin_port) 188 if (!addr->a.v4.sin_port)
163 addr->a.v4.sin_port = htons(bp->port); 189 addr->a.v4.sin_port = htons(bp->port);
164 190
165 addr->use_as_src = use_as_src; 191 addr->state = addr_state;
166 addr->valid = 1; 192 addr->valid = 1;
167 193
168 INIT_LIST_HEAD(&addr->list); 194 INIT_LIST_HEAD(&addr->list);
@@ -180,9 +206,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
180/* Delete an address from the bind address list in the SCTP_bind_addr 206/* Delete an address from the bind address list in the SCTP_bind_addr
181 * structure. 207 * structure.
182 */ 208 */
183int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr, 209int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
184 void fastcall (*rcu_call)(struct rcu_head *head,
185 void (*func)(struct rcu_head *head)))
186{ 210{
187 struct sctp_sockaddr_entry *addr, *temp; 211 struct sctp_sockaddr_entry *addr, *temp;
188 212
@@ -198,15 +222,10 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr,
198 } 222 }
199 } 223 }
200 224
201 /* Call the rcu callback provided in the args. This function is
202 * called by both BH packet processing and user side socket option
203 * processing, but it works on different lists in those 2 contexts.
204 * Each context provides it's own callback, whether call_rcu_bh()
205 * or call_rcu(), to make sure that we wait for an appropriate time.
206 */
207 if (addr && !addr->valid) { 225 if (addr && !addr->valid) {
208 rcu_call(&addr->rcu, sctp_local_addr_free); 226 call_rcu(&addr->rcu, sctp_local_addr_free);
209 SCTP_DBG_OBJCNT_DEC(addr); 227 SCTP_DBG_OBJCNT_DEC(addr);
228 return 0;
210 } 229 }
211 230
212 return -EINVAL; 231 return -EINVAL;
@@ -293,7 +312,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
293 } 312 }
294 313
295 af->from_addr_param(&addr, rawaddr, htons(port), 0); 314 af->from_addr_param(&addr, rawaddr, htons(port), 0);
296 retval = sctp_add_bind_addr(bp, &addr, 1, gfp); 315 retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp);
297 if (retval) { 316 if (retval) {
298 /* Can't finish building the list, clean up. */ 317 /* Can't finish building the list, clean up. */
299 sctp_bind_addr_clean(bp); 318 sctp_bind_addr_clean(bp);
@@ -334,6 +353,32 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
334 return match; 353 return match;
335} 354}
336 355
356/* Get the state of the entry in the bind_addr_list */
357int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
358 const union sctp_addr *addr)
359{
360 struct sctp_sockaddr_entry *laddr;
361 struct sctp_af *af;
362 int state = -1;
363
364 af = sctp_get_af_specific(addr->sa.sa_family);
365 if (unlikely(!af))
366 return state;
367
368 rcu_read_lock();
369 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
370 if (!laddr->valid)
371 continue;
372 if (af->cmp_addr(&laddr->a, addr)) {
373 state = laddr->state;
374 break;
375 }
376 }
377 rcu_read_unlock();
378
379 return state;
380}
381
337/* Find the first address in the bind address list that is not present in 382/* Find the first address in the bind address list that is not present in
338 * the addrs packed array. 383 * the addrs packed array.
339 */ 384 */
@@ -392,7 +437,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
392 (((AF_INET6 == addr->sa.sa_family) && 437 (((AF_INET6 == addr->sa.sa_family) &&
393 (flags & SCTP_ADDR6_ALLOWED) && 438 (flags & SCTP_ADDR6_ALLOWED) &&
394 (flags & SCTP_ADDR6_PEERSUPP)))) 439 (flags & SCTP_ADDR6_PEERSUPP))))
395 error = sctp_add_bind_addr(dest, addr, 1, gfp); 440 error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC,
441 gfp);
396 } 442 }
397 443
398 return error; 444 return error;
diff --git a/net/sctp/crc32c.c b/net/sctp/crc32c.c
deleted file mode 100644
index 181edabdb8ca..000000000000
--- a/net/sctp/crc32c.c
+++ /dev/null
@@ -1,222 +0,0 @@
1/* SCTP kernel reference Implementation
2 * Copyright (c) 1999-2001 Motorola, Inc.
3 * Copyright (c) 2001-2003 International Business Machines, Corp.
4 *
5 * This file is part of the SCTP kernel reference Implementation
6 *
7 * SCTP Checksum functions
8 *
9 * The SCTP reference implementation is free software;
10 * you can redistribute it and/or modify it under the terms of
11 * the GNU General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * The SCTP reference implementation is distributed in the hope that it
16 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
17 * ************************
18 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
19 * See the GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with GNU CC; see the file COPYING. If not, write to
23 * the Free Software Foundation, 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
25 *
26 * Please send any bug reports or fixes you make to the
27 * email address(es):
28 * lksctp developers <lksctp-developers@lists.sourceforge.net>
29 *
30 * Or submit a bug report through the following website:
31 * http://www.sf.net/projects/lksctp
32 *
33 * Written or modified by:
34 * Dinakaran Joseph
35 * Jon Grimm <jgrimm@us.ibm.com>
36 * Sridhar Samudrala <sri@us.ibm.com>
37 *
38 * Any bugs reported given to us we will try to fix... any fixes shared will
39 * be incorporated into the next SCTP release.
40 */
41
42/* The following code has been taken directly from
43 * draft-ietf-tsvwg-sctpcsum-03.txt
44 *
45 * The code has now been modified specifically for SCTP knowledge.
46 */
47
48#include <linux/types.h>
49#include <net/sctp/sctp.h>
50
51#define CRC32C_POLY 0x1EDC6F41
52#define CRC32C(c,d) (c=(c>>8)^crc_c[(c^(d))&0xFF])
53/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
54/* Copyright 2001, D. Otis. Use this program, code or tables */
55/* extracted from it, as desired without restriction. */
56/* */
57/* 32 Bit Reflected CRC table generation for SCTP. */
58/* To accommodate serial byte data being shifted out least */
59/* significant bit first, the table's 32 bit words are reflected */
60/* which flips both byte and bit MS and LS positions. The CRC */
61/* is calculated MS bits first from the perspective of the serial*/
62/* stream. The x^32 term is implied and the x^0 term may also */
63/* be shown as +1. The polynomial code used is 0x1EDC6F41. */
64/* Castagnoli93 */
65/* x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+ */
66/* x^11+x^10+x^9+x^8+x^6+x^0 */
67/* Guy Castagnoli Stefan Braeuer and Martin Herrman */
68/* "Optimization of Cyclic Redundancy-Check Codes */
69/* with 24 and 32 Parity Bits", */
70/* IEEE Transactions on Communications, Vol.41, No.6, June 1993 */
71/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
72static const __u32 crc_c[256] = {
73 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
74 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
75 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
76 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
77 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
78 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
79 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
80 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
81 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
82 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
83 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
84 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
85 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
86 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
87 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
88 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
89 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
90 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
91 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
92 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
93 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
94 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
95 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
96 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
97 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
98 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
99 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
100 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
101 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
102 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
103 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
104 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
105 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
106 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
107 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
108 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
109 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
110 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
111 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
112 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
113 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
114 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
115 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
116 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
117 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
118 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
119 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
120 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
121 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
122 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
123 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
124 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
125 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
126 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
127 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
128 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
129 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
130 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
131 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
132 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
133 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
134 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
135 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
136 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
137};
138
139__u32 sctp_start_cksum(__u8 *buffer, __u16 length)
140{
141 __u32 crc32 = ~(__u32) 0;
142 __u32 i;
143
144 /* Optimize this routine to be SCTP specific, knowing how
145 * to skip the checksum field of the SCTP header.
146 */
147
148 /* Calculate CRC up to the checksum. */
149 for (i = 0; i < (sizeof(struct sctphdr) - sizeof(__u32)); i++)
150 CRC32C(crc32, buffer[i]);
151
152 /* Skip checksum field of the header. */
153 for (i = 0; i < sizeof(__u32); i++)
154 CRC32C(crc32, 0);
155
156 /* Calculate the rest of the CRC. */
157 for (i = sizeof(struct sctphdr); i < length ; i++)
158 CRC32C(crc32, buffer[i]);
159
160 return crc32;
161}
162
163__u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32)
164{
165 __u32 i;
166
167 for (i = 0; i < length ; i++)
168 CRC32C(crc32, buffer[i]);
169
170 return crc32;
171}
172
173#if 0
174__u32 sctp_update_copy_cksum(__u8 *to, __u8 *from, __u16 length, __u32 crc32)
175{
176 __u32 i;
177 __u32 *_to = (__u32 *)to;
178 __u32 *_from = (__u32 *)from;
179
180 for (i = 0; i < (length/4); i++) {
181 _to[i] = _from[i];
182 CRC32C(crc32, from[i*4]);
183 CRC32C(crc32, from[i*4+1]);
184 CRC32C(crc32, from[i*4+2]);
185 CRC32C(crc32, from[i*4+3]);
186 }
187
188 return crc32;
189}
190#endif /* 0 */
191
192__u32 sctp_end_cksum(__u32 crc32)
193{
194 __u32 result;
195 __u8 byte0, byte1, byte2, byte3;
196
197 result = ~crc32;
198
199 /* result now holds the negated polynomial remainder;
200 * since the table and algorithm is "reflected" [williams95].
201 * That is, result has the same value as if we mapped the message
202 * to a polyomial, computed the host-bit-order polynomial
203 * remainder, performed final negation, then did an end-for-end
204 * bit-reversal.
205 * Note that a 32-bit bit-reversal is identical to four inplace
206 * 8-bit reversals followed by an end-for-end byteswap.
207 * In other words, the bytes of each bit are in the right order,
208 * but the bytes have been byteswapped. So we now do an explicit
209 * byteswap. On a little-endian machine, this byteswap and
210 * the final ntohl cancel out and could be elided.
211 */
212 byte0 = result & 0xff;
213 byte1 = (result>>8) & 0xff;
214 byte2 = (result>>16) & 0xff;
215 byte3 = (result>>24) & 0xff;
216
217 crc32 = ((byte0 << 24) |
218 (byte1 << 16) |
219 (byte2 << 8) |
220 byte3);
221 return crc32;
222}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 2d2d81ef4a69..de6f505d6ff8 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -328,24 +328,35 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
328 const union sctp_addr *paddr, 328 const union sctp_addr *paddr,
329 struct sctp_transport **transport) 329 struct sctp_transport **transport)
330{ 330{
331 struct sctp_association *asoc = NULL;
332 struct sctp_transport *t = NULL;
333 struct sctp_hashbucket *head;
334 struct sctp_ep_common *epb;
335 struct hlist_node *node;
336 int hash;
331 int rport; 337 int rport;
332 struct sctp_association *asoc;
333 struct list_head *pos;
334 338
339 *transport = NULL;
335 rport = ntohs(paddr->v4.sin_port); 340 rport = ntohs(paddr->v4.sin_port);
336 341
337 list_for_each(pos, &ep->asocs) { 342 hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport);
338 asoc = list_entry(pos, struct sctp_association, asocs); 343 head = &sctp_assoc_hashtable[hash];
339 if (rport == asoc->peer.port) { 344 read_lock(&head->lock);
340 *transport = sctp_assoc_lookup_paddr(asoc, paddr); 345 sctp_for_each_hentry(epb, node, &head->chain) {
341 346 asoc = sctp_assoc(epb);
342 if (*transport) 347 if (asoc->ep != ep || rport != asoc->peer.port)
343 return asoc; 348 goto next;
349
350 t = sctp_assoc_lookup_paddr(asoc, paddr);
351 if (t) {
352 *transport = t;
353 break;
344 } 354 }
355next:
356 asoc = NULL;
345 } 357 }
346 358 read_unlock(&head->lock);
347 *transport = NULL; 359 return asoc;
348 return NULL;
349} 360}
350 361
351/* Lookup association on an endpoint based on a peer address. BH-safe. */ 362/* Lookup association on an endpoint based on a peer address. BH-safe. */
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 86503e7fa21e..d695f710fc77 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -60,6 +60,7 @@
60#include <net/xfrm.h> 60#include <net/xfrm.h>
61#include <net/sctp/sctp.h> 61#include <net/sctp/sctp.h>
62#include <net/sctp/sm.h> 62#include <net/sctp/sm.h>
63#include <net/sctp/checksum.h>
63 64
64/* Forward declarations for internal helpers. */ 65/* Forward declarations for internal helpers. */
65static int sctp_rcv_ootb(struct sk_buff *); 66static int sctp_rcv_ootb(struct sk_buff *);
@@ -656,7 +657,6 @@ discard:
656/* Insert endpoint into the hash table. */ 657/* Insert endpoint into the hash table. */
657static void __sctp_hash_endpoint(struct sctp_endpoint *ep) 658static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
658{ 659{
659 struct sctp_ep_common **epp;
660 struct sctp_ep_common *epb; 660 struct sctp_ep_common *epb;
661 struct sctp_hashbucket *head; 661 struct sctp_hashbucket *head;
662 662
@@ -666,12 +666,7 @@ static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
666 head = &sctp_ep_hashtable[epb->hashent]; 666 head = &sctp_ep_hashtable[epb->hashent];
667 667
668 sctp_write_lock(&head->lock); 668 sctp_write_lock(&head->lock);
669 epp = &head->chain; 669 hlist_add_head(&epb->node, &head->chain);
670 epb->next = *epp;
671 if (epb->next)
672 (*epp)->pprev = &epb->next;
673 *epp = epb;
674 epb->pprev = epp;
675 sctp_write_unlock(&head->lock); 670 sctp_write_unlock(&head->lock);
676} 671}
677 672
@@ -691,19 +686,15 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
691 686
692 epb = &ep->base; 687 epb = &ep->base;
693 688
689 if (hlist_unhashed(&epb->node))
690 return;
691
694 epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); 692 epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
695 693
696 head = &sctp_ep_hashtable[epb->hashent]; 694 head = &sctp_ep_hashtable[epb->hashent];
697 695
698 sctp_write_lock(&head->lock); 696 sctp_write_lock(&head->lock);
699 697 __hlist_del(&epb->node);
700 if (epb->pprev) {
701 if (epb->next)
702 epb->next->pprev = epb->pprev;
703 *epb->pprev = epb->next;
704 epb->pprev = NULL;
705 }
706
707 sctp_write_unlock(&head->lock); 698 sctp_write_unlock(&head->lock);
708} 699}
709 700
@@ -721,12 +712,13 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l
721 struct sctp_hashbucket *head; 712 struct sctp_hashbucket *head;
722 struct sctp_ep_common *epb; 713 struct sctp_ep_common *epb;
723 struct sctp_endpoint *ep; 714 struct sctp_endpoint *ep;
715 struct hlist_node *node;
724 int hash; 716 int hash;
725 717
726 hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port)); 718 hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port));
727 head = &sctp_ep_hashtable[hash]; 719 head = &sctp_ep_hashtable[hash];
728 read_lock(&head->lock); 720 read_lock(&head->lock);
729 for (epb = head->chain; epb; epb = epb->next) { 721 sctp_for_each_hentry(epb, node, &head->chain) {
730 ep = sctp_ep(epb); 722 ep = sctp_ep(epb);
731 if (sctp_endpoint_is_match(ep, laddr)) 723 if (sctp_endpoint_is_match(ep, laddr))
732 goto hit; 724 goto hit;
@@ -744,7 +736,6 @@ hit:
744/* Insert association into the hash table. */ 736/* Insert association into the hash table. */
745static void __sctp_hash_established(struct sctp_association *asoc) 737static void __sctp_hash_established(struct sctp_association *asoc)
746{ 738{
747 struct sctp_ep_common **epp;
748 struct sctp_ep_common *epb; 739 struct sctp_ep_common *epb;
749 struct sctp_hashbucket *head; 740 struct sctp_hashbucket *head;
750 741
@@ -756,12 +747,7 @@ static void __sctp_hash_established(struct sctp_association *asoc)
756 head = &sctp_assoc_hashtable[epb->hashent]; 747 head = &sctp_assoc_hashtable[epb->hashent];
757 748
758 sctp_write_lock(&head->lock); 749 sctp_write_lock(&head->lock);
759 epp = &head->chain; 750 hlist_add_head(&epb->node, &head->chain);
760 epb->next = *epp;
761 if (epb->next)
762 (*epp)->pprev = &epb->next;
763 *epp = epb;
764 epb->pprev = epp;
765 sctp_write_unlock(&head->lock); 751 sctp_write_unlock(&head->lock);
766} 752}
767 753
@@ -790,14 +776,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc)
790 head = &sctp_assoc_hashtable[epb->hashent]; 776 head = &sctp_assoc_hashtable[epb->hashent];
791 777
792 sctp_write_lock(&head->lock); 778 sctp_write_lock(&head->lock);
793 779 __hlist_del(&epb->node);
794 if (epb->pprev) {
795 if (epb->next)
796 epb->next->pprev = epb->pprev;
797 *epb->pprev = epb->next;
798 epb->pprev = NULL;
799 }
800
801 sctp_write_unlock(&head->lock); 780 sctp_write_unlock(&head->lock);
802} 781}
803 782
@@ -822,6 +801,7 @@ static struct sctp_association *__sctp_lookup_association(
822 struct sctp_ep_common *epb; 801 struct sctp_ep_common *epb;
823 struct sctp_association *asoc; 802 struct sctp_association *asoc;
824 struct sctp_transport *transport; 803 struct sctp_transport *transport;
804 struct hlist_node *node;
825 int hash; 805 int hash;
826 806
827 /* Optimize here for direct hit, only listening connections can 807 /* Optimize here for direct hit, only listening connections can
@@ -830,7 +810,7 @@ static struct sctp_association *__sctp_lookup_association(
830 hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port)); 810 hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port));
831 head = &sctp_assoc_hashtable[hash]; 811 head = &sctp_assoc_hashtable[hash];
832 read_lock(&head->lock); 812 read_lock(&head->lock);
833 for (epb = head->chain; epb; epb = epb->next) { 813 sctp_for_each_hentry(epb, node, &head->chain) {
834 asoc = sctp_assoc(epb); 814 asoc = sctp_assoc(epb);
835 transport = sctp_assoc_is_match(asoc, local, peer); 815 transport = sctp_assoc_is_match(asoc, local, peer);
836 if (transport) 816 if (transport)
@@ -911,14 +891,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
911 891
912 ch = (sctp_chunkhdr_t *) skb->data; 892 ch = (sctp_chunkhdr_t *) skb->data;
913 893
914 /* The code below will attempt to walk the chunk and extract
915 * parameter information. Before we do that, we need to verify
916 * that the chunk length doesn't cause overflow. Otherwise, we'll
917 * walk off the end.
918 */
919 if (WORD_ROUND(ntohs(ch->length)) > skb->len)
920 return NULL;
921
922 /* 894 /*
923 * This code will NOT touch anything inside the chunk--it is 895 * This code will NOT touch anything inside the chunk--it is
924 * strictly READ-ONLY. 896 * strictly READ-ONLY.
@@ -955,6 +927,44 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
955 return NULL; 927 return NULL;
956} 928}
957 929
930/* ADD-IP, Section 5.2
931 * When an endpoint receives an ASCONF Chunk from the remote peer
932 * special procedures may be needed to identify the association the
933 * ASCONF Chunk is associated with. To properly find the association
934 * the following procedures SHOULD be followed:
935 *
936 * D2) If the association is not found, use the address found in the
937 * Address Parameter TLV combined with the port number found in the
938 * SCTP common header. If found proceed to rule D4.
939 *
940 * D2-ext) If more than one ASCONF Chunks are packed together, use the
941 * address found in the ASCONF Address Parameter TLV of each of the
942 * subsequent ASCONF Chunks. If found, proceed to rule D4.
943 */
944static struct sctp_association *__sctp_rcv_asconf_lookup(
945 sctp_chunkhdr_t *ch,
946 const union sctp_addr *laddr,
947 __be32 peer_port,
948 struct sctp_transport **transportp)
949{
950 sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch;
951 struct sctp_af *af;
952 union sctp_addr_param *param;
953 union sctp_addr paddr;
954
955 /* Skip over the ADDIP header and find the Address parameter */
956 param = (union sctp_addr_param *)(asconf + 1);
957
958 af = sctp_get_af_specific(param_type2af(param->v4.param_hdr.type));
959 if (unlikely(!af))
960 return NULL;
961
962 af->from_addr_param(&paddr, param, peer_port, 0);
963
964 return __sctp_lookup_association(laddr, &paddr, transportp);
965}
966
967
958/* SCTP-AUTH, Section 6.3: 968/* SCTP-AUTH, Section 6.3:
959* If the receiver does not find a STCB for a packet containing an AUTH 969* If the receiver does not find a STCB for a packet containing an AUTH
960* chunk as the first chunk and not a COOKIE-ECHO chunk as the second 970* chunk as the first chunk and not a COOKIE-ECHO chunk as the second
@@ -963,20 +973,64 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
963* 973*
964* This means that any chunks that can help us identify the association need 974* This means that any chunks that can help us identify the association need
965* to be looked at to find this assocation. 975* to be looked at to find this assocation.
966*
967* TODO: The only chunk currently defined that can do that is ASCONF, but we
968* don't support that functionality yet.
969*/ 976*/
970static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb, 977static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
971 const union sctp_addr *paddr,
972 const union sctp_addr *laddr, 978 const union sctp_addr *laddr,
973 struct sctp_transport **transportp) 979 struct sctp_transport **transportp)
974{ 980{
975 /* XXX - walk through the chunks looking for something that can 981 struct sctp_association *asoc = NULL;
976 * help us find the association. INIT, and INIT-ACK are not permitted. 982 sctp_chunkhdr_t *ch;
977 * That leaves ASCONF, but we don't support that yet. 983 int have_auth = 0;
984 unsigned int chunk_num = 1;
985 __u8 *ch_end;
986
987 /* Walk through the chunks looking for AUTH or ASCONF chunks
988 * to help us find the association.
978 */ 989 */
979 return NULL; 990 ch = (sctp_chunkhdr_t *) skb->data;
991 do {
992 /* Break out if chunk length is less then minimal. */
993 if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
994 break;
995
996 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
997 if (ch_end > skb_tail_pointer(skb))
998 break;
999
1000 switch(ch->type) {
1001 case SCTP_CID_AUTH:
1002 have_auth = chunk_num;
1003 break;
1004
1005 case SCTP_CID_COOKIE_ECHO:
1006 /* If a packet arrives containing an AUTH chunk as
1007 * a first chunk, a COOKIE-ECHO chunk as the second
1008 * chunk, and possibly more chunks after them, and
1009 * the receiver does not have an STCB for that
1010 * packet, then authentication is based on
1011 * the contents of the COOKIE- ECHO chunk.
1012 */
1013 if (have_auth == 1 && chunk_num == 2)
1014 return NULL;
1015 break;
1016
1017 case SCTP_CID_ASCONF:
1018 if (have_auth || sctp_addip_noauth)
1019 asoc = __sctp_rcv_asconf_lookup(ch, laddr,
1020 sctp_hdr(skb)->source,
1021 transportp);
1022 default:
1023 break;
1024 }
1025
1026 if (asoc)
1027 break;
1028
1029 ch = (sctp_chunkhdr_t *) ch_end;
1030 chunk_num++;
1031 } while (ch_end < skb_tail_pointer(skb));
1032
1033 return asoc;
980} 1034}
981 1035
982/* 1036/*
@@ -986,7 +1040,6 @@ static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb,
986 * chunks. 1040 * chunks.
987 */ 1041 */
988static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, 1042static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
989 const union sctp_addr *paddr,
990 const union sctp_addr *laddr, 1043 const union sctp_addr *laddr,
991 struct sctp_transport **transportp) 1044 struct sctp_transport **transportp)
992{ 1045{
@@ -994,6 +1047,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
994 1047
995 ch = (sctp_chunkhdr_t *) skb->data; 1048 ch = (sctp_chunkhdr_t *) skb->data;
996 1049
1050 /* The code below will attempt to walk the chunk and extract
1051 * parameter information. Before we do that, we need to verify
1052 * that the chunk length doesn't cause overflow. Otherwise, we'll
1053 * walk off the end.
1054 */
1055 if (WORD_ROUND(ntohs(ch->length)) > skb->len)
1056 return NULL;
1057
997 /* If this is INIT/INIT-ACK look inside the chunk too. */ 1058 /* If this is INIT/INIT-ACK look inside the chunk too. */
998 switch (ch->type) { 1059 switch (ch->type) {
999 case SCTP_CID_INIT: 1060 case SCTP_CID_INIT:
@@ -1001,11 +1062,12 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
1001 return __sctp_rcv_init_lookup(skb, laddr, transportp); 1062 return __sctp_rcv_init_lookup(skb, laddr, transportp);
1002 break; 1063 break;
1003 1064
1004 case SCTP_CID_AUTH: 1065 default:
1005 return __sctp_rcv_auth_lookup(skb, paddr, laddr, transportp); 1066 return __sctp_rcv_walk_lookup(skb, laddr, transportp);
1006 break; 1067 break;
1007 } 1068 }
1008 1069
1070
1009 return NULL; 1071 return NULL;
1010} 1072}
1011 1073
@@ -1024,7 +1086,7 @@ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
1024 * parameters within the INIT or INIT-ACK. 1086 * parameters within the INIT or INIT-ACK.
1025 */ 1087 */
1026 if (!asoc) 1088 if (!asoc)
1027 asoc = __sctp_rcv_lookup_harder(skb, paddr, laddr, transportp); 1089 asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp);
1028 1090
1029 return asoc; 1091 return asoc;
1030} 1092}
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index f10fe7fbf24c..cf4b7eb023b3 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -90,6 +90,10 @@ void sctp_inq_free(struct sctp_inq *queue)
90void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) 90void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
91{ 91{
92 /* Directly call the packet handling routine. */ 92 /* Directly call the packet handling routine. */
93 if (chunk->rcvr->dead) {
94 sctp_chunk_free(chunk);
95 return;
96 }
93 97
94 /* We are now calling this either from the soft interrupt 98 /* We are now calling this either from the soft interrupt
95 * or from the backlog processing. 99 * or from the backlog processing.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index eb4deaf58914..74f106a7a7e9 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -330,7 +330,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
330 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 330 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
331 if (!laddr->valid) 331 if (!laddr->valid)
332 continue; 332 continue;
333 if ((laddr->use_as_src) && 333 if ((laddr->state == SCTP_ADDR_SRC) &&
334 (laddr->a.sa.sa_family == AF_INET6) && 334 (laddr->a.sa.sa_family == AF_INET6) &&
335 (scope <= sctp_scope(&laddr->a))) { 335 (scope <= sctp_scope(&laddr->a))) {
336 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); 336 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
@@ -556,7 +556,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
556 if (!(type & IPV6_ADDR_UNICAST)) 556 if (!(type & IPV6_ADDR_UNICAST))
557 return 0; 557 return 0;
558 558
559 return ipv6_chk_addr(in6, NULL, 0); 559 return ipv6_chk_addr(&init_net, in6, NULL, 0);
560} 560}
561 561
562/* This function checks if the address is a valid address to be used for 562/* This function checks if the address is a valid address to be used for
@@ -631,7 +631,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
631 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 631 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
632 struct sctp6_sock *newsctp6sk; 632 struct sctp6_sock *newsctp6sk;
633 633
634 newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); 634 newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot);
635 if (!newsk) 635 if (!newsk)
636 goto out; 636 goto out;
637 637
@@ -858,7 +858,8 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
858 dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); 858 dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id);
859 if (!dev) 859 if (!dev)
860 return 0; 860 return 0;
861 if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { 861 if (!ipv6_chk_addr(&init_net, &addr->v6.sin6_addr,
862 dev, 0)) {
862 dev_put(dev); 863 dev_put(dev);
863 return 0; 864 return 0;
864 } 865 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 847639d542c0..5e811b91f21c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -60,6 +60,7 @@
60 60
61#include <net/sctp/sctp.h> 61#include <net/sctp/sctp.h>
62#include <net/sctp/sm.h> 62#include <net/sctp/sm.h>
63#include <net/sctp/checksum.h>
63 64
64/* Forward declarations for private helpers. */ 65/* Forward declarations for private helpers. */
65static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, 66static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 28f4fe77ceee..a42af865c2ef 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -382,7 +382,7 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new)
382/* Mark all the eligible packets on a transport for retransmission. */ 382/* Mark all the eligible packets on a transport for retransmission. */
383void sctp_retransmit_mark(struct sctp_outq *q, 383void sctp_retransmit_mark(struct sctp_outq *q,
384 struct sctp_transport *transport, 384 struct sctp_transport *transport,
385 __u8 fast_retransmit) 385 __u8 reason)
386{ 386{
387 struct list_head *lchunk, *ltemp; 387 struct list_head *lchunk, *ltemp;
388 struct sctp_chunk *chunk; 388 struct sctp_chunk *chunk;
@@ -412,20 +412,20 @@ void sctp_retransmit_mark(struct sctp_outq *q,
412 continue; 412 continue;
413 } 413 }
414 414
415 /* If we are doing retransmission due to a fast retransmit, 415 /* If we are doing retransmission due to a timeout or pmtu
416 * only the chunk's that are marked for fast retransmit 416 * discovery, only the chunks that are not yet acked should
417 * should be added to the retransmit queue. If we are doing 417 * be added to the retransmit queue.
418 * retransmission due to a timeout or pmtu discovery, only the
419 * chunks that are not yet acked should be added to the
420 * retransmit queue.
421 */ 418 */
422 if ((fast_retransmit && (chunk->fast_retransmit > 0)) || 419 if ((reason == SCTP_RTXR_FAST_RTX &&
423 (!fast_retransmit && !chunk->tsn_gap_acked)) { 420 (chunk->fast_retransmit > 0)) ||
421 (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) {
424 /* If this chunk was sent less then 1 rto ago, do not 422 /* If this chunk was sent less then 1 rto ago, do not
425 * retransmit this chunk, but give the peer time 423 * retransmit this chunk, but give the peer time
426 * to acknowlege it. 424 * to acknowlege it. Do this only when
425 * retransmitting due to T3 timeout.
427 */ 426 */
428 if ((jiffies - chunk->sent_at) < transport->rto) 427 if (reason == SCTP_RTXR_T3_RTX &&
428 (jiffies - chunk->sent_at) < transport->last_rto)
429 continue; 429 continue;
430 430
431 /* RFC 2960 6.2.1 Processing a Received SACK 431 /* RFC 2960 6.2.1 Processing a Received SACK
@@ -467,10 +467,10 @@ void sctp_retransmit_mark(struct sctp_outq *q,
467 } 467 }
468 } 468 }
469 469
470 SCTP_DEBUG_PRINTK("%s: transport: %p, fast_retransmit: %d, " 470 SCTP_DEBUG_PRINTK("%s: transport: %p, reason: %d, "
471 "cwnd: %d, ssthresh: %d, flight_size: %d, " 471 "cwnd: %d, ssthresh: %d, flight_size: %d, "
472 "pba: %d\n", __FUNCTION__, 472 "pba: %d\n", __FUNCTION__,
473 transport, fast_retransmit, 473 transport, reason,
474 transport->cwnd, transport->ssthresh, 474 transport->cwnd, transport->ssthresh,
475 transport->flight_size, 475 transport->flight_size,
476 transport->partial_bytes_acked); 476 transport->partial_bytes_acked);
@@ -484,7 +484,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
484 sctp_retransmit_reason_t reason) 484 sctp_retransmit_reason_t reason)
485{ 485{
486 int error = 0; 486 int error = 0;
487 __u8 fast_retransmit = 0;
488 487
489 switch(reason) { 488 switch(reason) {
490 case SCTP_RTXR_T3_RTX: 489 case SCTP_RTXR_T3_RTX:
@@ -499,16 +498,18 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
499 case SCTP_RTXR_FAST_RTX: 498 case SCTP_RTXR_FAST_RTX:
500 SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); 499 SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
501 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); 500 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
502 fast_retransmit = 1;
503 break; 501 break;
504 case SCTP_RTXR_PMTUD: 502 case SCTP_RTXR_PMTUD:
505 SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); 503 SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
506 break; 504 break;
505 case SCTP_RTXR_T1_RTX:
506 SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS);
507 break;
507 default: 508 default:
508 BUG(); 509 BUG();
509 } 510 }
510 511
511 sctp_retransmit_mark(q, transport, fast_retransmit); 512 sctp_retransmit_mark(q, transport, reason);
512 513
513 /* PR-SCTP A5) Any time the T3-rtx timer expires, on any destination, 514 /* PR-SCTP A5) Any time the T3-rtx timer expires, on any destination,
514 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by 515 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by
@@ -641,7 +642,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
641 642
642 /* If we are here due to a retransmit timeout or a fast 643 /* If we are here due to a retransmit timeout or a fast
643 * retransmit and if there are any chunks left in the retransmit 644 * retransmit and if there are any chunks left in the retransmit
644 * queue that could not fit in the PMTU sized packet, they need * to be marked as ineligible for a subsequent fast retransmit. 645 * queue that could not fit in the PMTU sized packet, they need
646 * to be marked as ineligible for a subsequent fast retransmit.
645 */ 647 */
646 if (rtx_timeout && !lchunk) { 648 if (rtx_timeout && !lchunk) {
647 list_for_each(lchunk1, lqueue) { 649 list_for_each(lchunk1, lqueue) {
@@ -660,10 +662,9 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
660int sctp_outq_uncork(struct sctp_outq *q) 662int sctp_outq_uncork(struct sctp_outq *q)
661{ 663{
662 int error = 0; 664 int error = 0;
663 if (q->cork) { 665 if (q->cork)
664 q->cork = 0; 666 q->cork = 0;
665 error = sctp_outq_flush(q, 0); 667 error = sctp_outq_flush(q, 0);
666 }
667 return error; 668 return error;
668} 669}
669 670
@@ -715,7 +716,29 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
715 new_transport = chunk->transport; 716 new_transport = chunk->transport;
716 717
717 if (!new_transport) { 718 if (!new_transport) {
718 new_transport = asoc->peer.active_path; 719 /*
720 * If we have a prior transport pointer, see if
721 * the destination address of the chunk
722 * matches the destination address of the
723 * current transport. If not a match, then
724 * try to look up the transport with a given
725 * destination address. We do this because
726 * after processing ASCONFs, we may have new
727 * transports created.
728 */
729 if (transport &&
730 sctp_cmp_addr_exact(&chunk->dest,
731 &transport->ipaddr))
732 new_transport = transport;
733 else
734 new_transport = sctp_assoc_lookup_paddr(asoc,
735 &chunk->dest);
736
737 /* if we still don't have a new transport, then
738 * use the current active path.
739 */
740 if (!new_transport)
741 new_transport = asoc->peer.active_path;
719 } else if ((new_transport->state == SCTP_INACTIVE) || 742 } else if ((new_transport->state == SCTP_INACTIVE) ||
720 (new_transport->state == SCTP_UNCONFIRMED)) { 743 (new_transport->state == SCTP_UNCONFIRMED)) {
721 /* If the chunk is Heartbeat or Heartbeat Ack, 744 /* If the chunk is Heartbeat or Heartbeat Ack,
@@ -728,9 +751,12 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
728 * address of the IP datagram containing the 751 * address of the IP datagram containing the
729 * HEARTBEAT chunk to which this ack is responding. 752 * HEARTBEAT chunk to which this ack is responding.
730 * ... 753 * ...
754 *
755 * ASCONF_ACKs also must be sent to the source.
731 */ 756 */
732 if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT && 757 if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT &&
733 chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK) 758 chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK &&
759 chunk->chunk_hdr->type != SCTP_CID_ASCONF_ACK)
734 new_transport = asoc->peer.active_path; 760 new_transport = asoc->peer.active_path;
735 } 761 }
736 762
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index e4cd841a22e4..249973204070 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -225,6 +225,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
225 struct sctp_ep_common *epb; 225 struct sctp_ep_common *epb;
226 struct sctp_endpoint *ep; 226 struct sctp_endpoint *ep;
227 struct sock *sk; 227 struct sock *sk;
228 struct hlist_node *node;
228 int hash = *(loff_t *)v; 229 int hash = *(loff_t *)v;
229 230
230 if (hash >= sctp_ep_hashsize) 231 if (hash >= sctp_ep_hashsize)
@@ -233,7 +234,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
233 head = &sctp_ep_hashtable[hash]; 234 head = &sctp_ep_hashtable[hash];
234 sctp_local_bh_disable(); 235 sctp_local_bh_disable();
235 read_lock(&head->lock); 236 read_lock(&head->lock);
236 for (epb = head->chain; epb; epb = epb->next) { 237 sctp_for_each_hentry(epb, node, &head->chain) {
237 ep = sctp_ep(epb); 238 ep = sctp_ep(epb);
238 sk = epb->sk; 239 sk = epb->sk;
239 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, 240 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
@@ -328,6 +329,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
328 struct sctp_ep_common *epb; 329 struct sctp_ep_common *epb;
329 struct sctp_association *assoc; 330 struct sctp_association *assoc;
330 struct sock *sk; 331 struct sock *sk;
332 struct hlist_node *node;
331 int hash = *(loff_t *)v; 333 int hash = *(loff_t *)v;
332 334
333 if (hash >= sctp_assoc_hashsize) 335 if (hash >= sctp_assoc_hashsize)
@@ -336,7 +338,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
336 head = &sctp_assoc_hashtable[hash]; 338 head = &sctp_assoc_hashtable[hash];
337 sctp_local_bh_disable(); 339 sctp_local_bh_disable();
338 read_lock(&head->lock); 340 read_lock(&head->lock);
339 for (epb = head->chain; epb; epb = epb->next) { 341 sctp_for_each_hentry(epb, node, &head->chain) {
340 assoc = sctp_assoc(epb); 342 assoc = sctp_assoc(epb);
341 sk = epb->sk; 343 sk = epb->sk;
342 seq_printf(seq, 344 seq_printf(seq,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f5cd96f5fe74..1339742e49f1 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -229,8 +229,8 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
229 (((AF_INET6 == addr->a.sa.sa_family) && 229 (((AF_INET6 == addr->a.sa.sa_family) &&
230 (copy_flags & SCTP_ADDR6_ALLOWED) && 230 (copy_flags & SCTP_ADDR6_ALLOWED) &&
231 (copy_flags & SCTP_ADDR6_PEERSUPP)))) { 231 (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
232 error = sctp_add_bind_addr(bp, &addr->a, 1, 232 error = sctp_add_bind_addr(bp, &addr->a,
233 GFP_ATOMIC); 233 SCTP_ADDR_SRC, GFP_ATOMIC);
234 if (error) 234 if (error)
235 goto end_copy; 235 goto end_copy;
236 } 236 }
@@ -359,7 +359,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
359 const struct sk_buff *skb) 359 const struct sk_buff *skb)
360{ 360{
361 /* Is this a non-unicast address or a unusable SCTP address? */ 361 /* Is this a non-unicast address or a unusable SCTP address? */
362 if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) 362 if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr))
363 return 0; 363 return 0;
364 364
365 /* Is this a broadcast address? */ 365 /* Is this a broadcast address? */
@@ -372,7 +372,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
372/* Should this be available for binding? */ 372/* Should this be available for binding? */
373static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) 373static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
374{ 374{
375 int ret = inet_addr_type(addr->v4.sin_addr.s_addr); 375 int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr);
376 376
377 377
378 if (addr->v4.sin_addr.s_addr != INADDR_ANY && 378 if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
@@ -408,13 +408,15 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
408 */ 408 */
409 409
410 /* Check for unusable SCTP addresses. */ 410 /* Check for unusable SCTP addresses. */
411 if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) { 411 if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
412 retval = SCTP_SCOPE_UNUSABLE; 412 retval = SCTP_SCOPE_UNUSABLE;
413 } else if (LOOPBACK(addr->v4.sin_addr.s_addr)) { 413 } else if (ipv4_is_loopback(addr->v4.sin_addr.s_addr)) {
414 retval = SCTP_SCOPE_LOOPBACK; 414 retval = SCTP_SCOPE_LOOPBACK;
415 } else if (IS_IPV4_LINK_ADDRESS(&addr->v4.sin_addr.s_addr)) { 415 } else if (ipv4_is_linklocal_169(addr->v4.sin_addr.s_addr)) {
416 retval = SCTP_SCOPE_LINK; 416 retval = SCTP_SCOPE_LINK;
417 } else if (IS_IPV4_PRIVATE_ADDRESS(&addr->v4.sin_addr.s_addr)) { 417 } else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) ||
418 ipv4_is_private_172(addr->v4.sin_addr.s_addr) ||
419 ipv4_is_private_192(addr->v4.sin_addr.s_addr)) {
418 retval = SCTP_SCOPE_PRIVATE; 420 retval = SCTP_SCOPE_PRIVATE;
419 } else { 421 } else {
420 retval = SCTP_SCOPE_GLOBAL; 422 retval = SCTP_SCOPE_GLOBAL;
@@ -452,7 +454,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
452 __FUNCTION__, NIPQUAD(fl.fl4_dst), 454 __FUNCTION__, NIPQUAD(fl.fl4_dst),
453 NIPQUAD(fl.fl4_src)); 455 NIPQUAD(fl.fl4_src));
454 456
455 if (!ip_route_output_key(&rt, &fl)) { 457 if (!ip_route_output_key(&init_net, &rt, &fl)) {
456 dst = &rt->u.dst; 458 dst = &rt->u.dst;
457 } 459 }
458 460
@@ -470,7 +472,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
470 */ 472 */
471 rcu_read_lock(); 473 rcu_read_lock();
472 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 474 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
473 if (!laddr->valid || !laddr->use_as_src) 475 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
474 continue; 476 continue;
475 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); 477 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
476 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) 478 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
@@ -492,10 +494,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
492 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 494 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
493 if (!laddr->valid) 495 if (!laddr->valid)
494 continue; 496 continue;
495 if ((laddr->use_as_src) && 497 if ((laddr->state == SCTP_ADDR_SRC) &&
496 (AF_INET == laddr->a.sa.sa_family)) { 498 (AF_INET == laddr->a.sa.sa_family)) {
497 fl.fl4_src = laddr->a.v4.sin_addr.s_addr; 499 fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
498 if (!ip_route_output_key(&rt, &fl)) { 500 if (!ip_route_output_key(&init_net, &rt, &fl)) {
499 dst = &rt->u.dst; 501 dst = &rt->u.dst;
500 goto out_unlock; 502 goto out_unlock;
501 } 503 }
@@ -552,7 +554,8 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
552{ 554{
553 struct inet_sock *inet = inet_sk(sk); 555 struct inet_sock *inet = inet_sk(sk);
554 struct inet_sock *newinet; 556 struct inet_sock *newinet;
555 struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); 557 struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL,
558 sk->sk_prot);
556 559
557 if (!newsk) 560 if (!newsk)
558 goto out; 561 goto out;
@@ -1106,7 +1109,7 @@ SCTP_STATIC __init int sctp_init(void)
1106 sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); 1109 sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
1107 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); 1110 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
1108 1111
1109 sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; 1112 sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;
1110 sysctl_sctp_wmem[1] = 16*1024; 1113 sysctl_sctp_wmem[1] = 16*1024;
1111 sysctl_sctp_wmem[2] = max(64*1024, max_share); 1114 sysctl_sctp_wmem[2] = max(64*1024, max_share);
1112 1115
@@ -1136,7 +1139,7 @@ SCTP_STATIC __init int sctp_init(void)
1136 } 1139 }
1137 for (i = 0; i < sctp_assoc_hashsize; i++) { 1140 for (i = 0; i < sctp_assoc_hashsize; i++) {
1138 rwlock_init(&sctp_assoc_hashtable[i].lock); 1141 rwlock_init(&sctp_assoc_hashtable[i].lock);
1139 sctp_assoc_hashtable[i].chain = NULL; 1142 INIT_HLIST_HEAD(&sctp_assoc_hashtable[i].chain);
1140 } 1143 }
1141 1144
1142 /* Allocate and initialize the endpoint hash table. */ 1145 /* Allocate and initialize the endpoint hash table. */
@@ -1150,7 +1153,7 @@ SCTP_STATIC __init int sctp_init(void)
1150 } 1153 }
1151 for (i = 0; i < sctp_ep_hashsize; i++) { 1154 for (i = 0; i < sctp_ep_hashsize; i++) {
1152 rwlock_init(&sctp_ep_hashtable[i].lock); 1155 rwlock_init(&sctp_ep_hashtable[i].lock);
1153 sctp_ep_hashtable[i].chain = NULL; 1156 INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
1154 } 1157 }
1155 1158
1156 /* Allocate and initialize the SCTP port hash table. */ 1159 /* Allocate and initialize the SCTP port hash table. */
@@ -1169,7 +1172,7 @@ SCTP_STATIC __init int sctp_init(void)
1169 } 1172 }
1170 for (i = 0; i < sctp_port_hashsize; i++) { 1173 for (i = 0; i < sctp_port_hashsize; i++) {
1171 spin_lock_init(&sctp_port_hashtable[i].lock); 1174 spin_lock_init(&sctp_port_hashtable[i].lock);
1172 sctp_port_hashtable[i].chain = NULL; 1175 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
1173 } 1176 }
1174 1177
1175 printk(KERN_INFO "SCTP: Hash tables configured " 1178 printk(KERN_INFO "SCTP: Hash tables configured "
@@ -1178,6 +1181,7 @@ SCTP_STATIC __init int sctp_init(void)
1178 1181
1179 /* Disable ADDIP by default. */ 1182 /* Disable ADDIP by default. */
1180 sctp_addip_enable = 0; 1183 sctp_addip_enable = 0;
1184 sctp_addip_noauth = 0;
1181 1185
1182 /* Enable PR-SCTP by default. */ 1186 /* Enable PR-SCTP by default. */
1183 sctp_prsctp_enable = 1; 1187 sctp_prsctp_enable = 1;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index c377e4e8f653..77383e9b3988 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -77,6 +77,8 @@ static int sctp_process_param(struct sctp_association *asoc,
77 union sctp_params param, 77 union sctp_params param,
78 const union sctp_addr *peer_addr, 78 const union sctp_addr *peer_addr,
79 gfp_t gfp); 79 gfp_t gfp);
80static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
81 const void *data);
80 82
81/* What was the inbound interface for this chunk? */ 83/* What was the inbound interface for this chunk? */
82int sctp_chunk_iif(const struct sctp_chunk *chunk) 84int sctp_chunk_iif(const struct sctp_chunk *chunk)
@@ -207,11 +209,10 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
207 209
208 chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); 210 chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types);
209 chunksize += sizeof(ecap_param); 211 chunksize += sizeof(ecap_param);
210 if (sctp_prsctp_enable) { 212
213 if (sctp_prsctp_enable)
211 chunksize += sizeof(prsctp_param); 214 chunksize += sizeof(prsctp_param);
212 extensions[num_ext] = SCTP_CID_FWD_TSN; 215
213 num_ext += 1;
214 }
215 /* ADDIP: Section 4.2.7: 216 /* ADDIP: Section 4.2.7:
216 * An implementation supporting this extension [ADDIP] MUST list 217 * An implementation supporting this extension [ADDIP] MUST list
217 * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and 218 * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and
@@ -243,7 +244,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
243 if (auth_chunks->length) 244 if (auth_chunks->length)
244 chunksize += ntohs(auth_chunks->length); 245 chunksize += ntohs(auth_chunks->length);
245 else 246 else
246 auth_hmacs = NULL; 247 auth_chunks = NULL;
247 248
248 extensions[num_ext] = SCTP_CID_AUTH; 249 extensions[num_ext] = SCTP_CID_AUTH;
249 num_ext += 1; 250 num_ext += 1;
@@ -288,7 +289,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
288 289
289 sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); 290 sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
290 291
291 /* Add the supported extensions paramter. Be nice and add this 292 /* Add the supported extensions parameter. Be nice and add this
292 * fist before addiding the parameters for the extensions themselves 293 * fist before addiding the parameters for the extensions themselves
293 */ 294 */
294 if (num_ext) { 295 if (num_ext) {
@@ -297,7 +298,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
297 htons(sizeof(sctp_supported_ext_param_t) + num_ext); 298 htons(sizeof(sctp_supported_ext_param_t) + num_ext);
298 sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), 299 sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
299 &ext_param); 300 &ext_param);
300 sctp_addto_chunk(retval, num_ext, extensions); 301 sctp_addto_param(retval, num_ext, extensions);
301 } 302 }
302 303
303 if (sctp_prsctp_enable) 304 if (sctp_prsctp_enable)
@@ -371,12 +372,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
371 if (asoc->peer.ecn_capable) 372 if (asoc->peer.ecn_capable)
372 chunksize += sizeof(ecap_param); 373 chunksize += sizeof(ecap_param);
373 374
374 /* Tell peer that we'll do PR-SCTP only if peer advertised. */ 375 if (sctp_prsctp_enable)
375 if (asoc->peer.prsctp_capable) {
376 chunksize += sizeof(prsctp_param); 376 chunksize += sizeof(prsctp_param);
377 extensions[num_ext] = SCTP_CID_FWD_TSN;
378 num_ext += 1;
379 }
380 377
381 if (sctp_addip_enable) { 378 if (sctp_addip_enable) {
382 extensions[num_ext] = SCTP_CID_ASCONF; 379 extensions[num_ext] = SCTP_CID_ASCONF;
@@ -384,7 +381,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
384 num_ext += 2; 381 num_ext += 2;
385 } 382 }
386 383
387 chunksize += sizeof(ext_param) + num_ext;
388 chunksize += sizeof(aiparam); 384 chunksize += sizeof(aiparam);
389 385
390 if (asoc->peer.auth_capable) { 386 if (asoc->peer.auth_capable) {
@@ -407,6 +403,9 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
407 num_ext += 1; 403 num_ext += 1;
408 } 404 }
409 405
406 if (num_ext)
407 chunksize += sizeof(sctp_supported_ext_param_t) + num_ext;
408
410 /* Now allocate and fill out the chunk. */ 409 /* Now allocate and fill out the chunk. */
411 retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); 410 retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
412 if (!retval) 411 if (!retval)
@@ -428,7 +427,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
428 htons(sizeof(sctp_supported_ext_param_t) + num_ext); 427 htons(sizeof(sctp_supported_ext_param_t) + num_ext);
429 sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), 428 sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
430 &ext_param); 429 &ext_param);
431 sctp_addto_chunk(retval, num_ext, extensions); 430 sctp_addto_param(retval, num_ext, extensions);
432 } 431 }
433 if (asoc->peer.prsctp_capable) 432 if (asoc->peer.prsctp_capable)
434 sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); 433 sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
@@ -1276,6 +1275,9 @@ nodata:
1276/* Release the memory occupied by a chunk. */ 1275/* Release the memory occupied by a chunk. */
1277static void sctp_chunk_destroy(struct sctp_chunk *chunk) 1276static void sctp_chunk_destroy(struct sctp_chunk *chunk)
1278{ 1277{
1278 BUG_ON(!list_empty(&chunk->list));
1279 list_del_init(&chunk->transmitted_list);
1280
1279 /* Free the chunk skb data and the SCTP_chunk stub itself. */ 1281 /* Free the chunk skb data and the SCTP_chunk stub itself. */
1280 dev_kfree_skb(chunk->skb); 1282 dev_kfree_skb(chunk->skb);
1281 1283
@@ -1286,9 +1288,6 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk)
1286/* Possibly, free the chunk. */ 1288/* Possibly, free the chunk. */
1287void sctp_chunk_free(struct sctp_chunk *chunk) 1289void sctp_chunk_free(struct sctp_chunk *chunk)
1288{ 1290{
1289 BUG_ON(!list_empty(&chunk->list));
1290 list_del_init(&chunk->transmitted_list);
1291
1292 /* Release our reference on the message tracker. */ 1291 /* Release our reference on the message tracker. */
1293 if (chunk->msg) 1292 if (chunk->msg)
1294 sctp_datamsg_put(chunk->msg); 1293 sctp_datamsg_put(chunk->msg);
@@ -1693,8 +1692,8 @@ no_hmac:
1693 1692
1694 /* Also, add the destination address. */ 1693 /* Also, add the destination address. */
1695 if (list_empty(&retval->base.bind_addr.address_list)) { 1694 if (list_empty(&retval->base.bind_addr.address_list)) {
1696 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, 1695 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
1697 GFP_ATOMIC); 1696 SCTP_ADDR_SRC, GFP_ATOMIC);
1698 } 1697 }
1699 1698
1700 retval->next_tsn = retval->c.initial_tsn; 1699 retval->next_tsn = retval->c.initial_tsn;
@@ -1788,9 +1787,14 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
1788 sizeof(sctp_paramhdr_t); 1787 sizeof(sctp_paramhdr_t);
1789 1788
1790 1789
1790 /* This is a fatal error. Any accumulated non-fatal errors are
1791 * not reported.
1792 */
1793 if (*errp)
1794 sctp_chunk_free(*errp);
1795
1791 /* Create an error chunk and fill it in with our payload. */ 1796 /* Create an error chunk and fill it in with our payload. */
1792 if (!*errp) 1797 *errp = sctp_make_op_error_space(asoc, chunk, payload_len);
1793 *errp = sctp_make_op_error_space(asoc, chunk, payload_len);
1794 1798
1795 if (*errp) { 1799 if (*errp) {
1796 sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, 1800 sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION,
@@ -1813,9 +1817,15 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
1813{ 1817{
1814 __u16 len = ntohs(param.p->length); 1818 __u16 len = ntohs(param.p->length);
1815 1819
1816 /* Make an ERROR chunk. */ 1820 /* Processing of the HOST_NAME parameter will generate an
1817 if (!*errp) 1821 * ABORT. If we've accumulated any non-fatal errors, they
1818 *errp = sctp_make_op_error_space(asoc, chunk, len); 1822 * would be unrecognized parameters and we should not include
1823 * them in the ABORT.
1824 */
1825 if (*errp)
1826 sctp_chunk_free(*errp);
1827
1828 *errp = sctp_make_op_error_space(asoc, chunk, len);
1819 1829
1820 if (*errp) { 1830 if (*errp) {
1821 sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, len); 1831 sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, len);
@@ -1826,6 +1836,39 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
1826 return 0; 1836 return 0;
1827} 1837}
1828 1838
1839static int sctp_verify_ext_param(union sctp_params param)
1840{
1841 __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
1842 int have_auth = 0;
1843 int have_asconf = 0;
1844 int i;
1845
1846 for (i = 0; i < num_ext; i++) {
1847 switch (param.ext->chunks[i]) {
1848 case SCTP_CID_AUTH:
1849 have_auth = 1;
1850 break;
1851 case SCTP_CID_ASCONF:
1852 case SCTP_CID_ASCONF_ACK:
1853 have_asconf = 1;
1854 break;
1855 }
1856 }
1857
1858 /* ADD-IP Security: The draft requires us to ABORT or ignore the
1859 * INIT/INIT-ACK if ADD-IP is listed, but AUTH is not. Do this
1860 * only if ADD-IP is turned on and we are not backward-compatible
1861 * mode.
1862 */
1863 if (sctp_addip_noauth)
1864 return 1;
1865
1866 if (sctp_addip_enable && !have_auth && have_asconf)
1867 return 0;
1868
1869 return 1;
1870}
1871
1829static void sctp_process_ext_param(struct sctp_association *asoc, 1872static void sctp_process_ext_param(struct sctp_association *asoc,
1830 union sctp_params param) 1873 union sctp_params param)
1831{ 1874{
@@ -1847,7 +1890,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
1847 break; 1890 break;
1848 case SCTP_CID_ASCONF: 1891 case SCTP_CID_ASCONF:
1849 case SCTP_CID_ASCONF_ACK: 1892 case SCTP_CID_ASCONF_ACK:
1850 asoc->peer.addip_capable = 1; 1893 asoc->peer.asconf_capable = 1;
1851 break; 1894 break;
1852 default: 1895 default:
1853 break; 1896 break;
@@ -1862,56 +1905,40 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
1862 * taken if the processing endpoint does not recognize the 1905 * taken if the processing endpoint does not recognize the
1863 * Parameter Type. 1906 * Parameter Type.
1864 * 1907 *
1865 * 00 - Stop processing this SCTP chunk and discard it, 1908 * 00 - Stop processing this parameter; do not process any further
1866 * do not process any further chunks within it. 1909 * parameters within this chunk
1867 * 1910 *
1868 * 01 - Stop processing this SCTP chunk and discard it, 1911 * 01 - Stop processing this parameter, do not process any further
1869 * do not process any further chunks within it, and report 1912 * parameters within this chunk, and report the unrecognized
1870 * the unrecognized parameter in an 'Unrecognized 1913 * parameter in an 'Unrecognized Parameter' ERROR chunk.
1871 * Parameter Type' (in either an ERROR or in the INIT ACK).
1872 * 1914 *
1873 * 10 - Skip this parameter and continue processing. 1915 * 10 - Skip this parameter and continue processing.
1874 * 1916 *
1875 * 11 - Skip this parameter and continue processing but 1917 * 11 - Skip this parameter and continue processing but
1876 * report the unrecognized parameter in an 1918 * report the unrecognized parameter in an
1877 * 'Unrecognized Parameter Type' (in either an ERROR or in 1919 * 'Unrecognized Parameter' ERROR chunk.
1878 * the INIT ACK).
1879 * 1920 *
1880 * Return value: 1921 * Return value:
1881 * 0 - discard the chunk 1922 * SCTP_IERROR_NO_ERROR - continue with the chunk
1882 * 1 - continue with the chunk 1923 * SCTP_IERROR_ERROR - stop and report an error.
1924 * SCTP_IERROR_NOMEME - out of memory.
1883 */ 1925 */
1884static int sctp_process_unk_param(const struct sctp_association *asoc, 1926static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
1885 union sctp_params param, 1927 union sctp_params param,
1886 struct sctp_chunk *chunk, 1928 struct sctp_chunk *chunk,
1887 struct sctp_chunk **errp) 1929 struct sctp_chunk **errp)
1888{ 1930{
1889 int retval = 1; 1931 int retval = SCTP_IERROR_NO_ERROR;
1890 1932
1891 switch (param.p->type & SCTP_PARAM_ACTION_MASK) { 1933 switch (param.p->type & SCTP_PARAM_ACTION_MASK) {
1892 case SCTP_PARAM_ACTION_DISCARD: 1934 case SCTP_PARAM_ACTION_DISCARD:
1893 retval = 0; 1935 retval = SCTP_IERROR_ERROR;
1894 break;
1895 case SCTP_PARAM_ACTION_DISCARD_ERR:
1896 retval = 0;
1897 /* Make an ERROR chunk, preparing enough room for
1898 * returning multiple unknown parameters.
1899 */
1900 if (NULL == *errp)
1901 *errp = sctp_make_op_error_space(asoc, chunk,
1902 ntohs(chunk->chunk_hdr->length));
1903
1904 if (*errp) {
1905 sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
1906 WORD_ROUND(ntohs(param.p->length)));
1907 sctp_addto_chunk(*errp,
1908 WORD_ROUND(ntohs(param.p->length)),
1909 param.v);
1910 }
1911
1912 break; 1936 break;
1913 case SCTP_PARAM_ACTION_SKIP: 1937 case SCTP_PARAM_ACTION_SKIP:
1914 break; 1938 break;
1939 case SCTP_PARAM_ACTION_DISCARD_ERR:
1940 retval = SCTP_IERROR_ERROR;
1941 /* Fall through */
1915 case SCTP_PARAM_ACTION_SKIP_ERR: 1942 case SCTP_PARAM_ACTION_SKIP_ERR:
1916 /* Make an ERROR chunk, preparing enough room for 1943 /* Make an ERROR chunk, preparing enough room for
1917 * returning multiple unknown parameters. 1944 * returning multiple unknown parameters.
@@ -1932,9 +1959,8 @@ static int sctp_process_unk_param(const struct sctp_association *asoc,
1932 * to the peer and the association won't be 1959 * to the peer and the association won't be
1933 * established. 1960 * established.
1934 */ 1961 */
1935 retval = 0; 1962 retval = SCTP_IERROR_NOMEM;
1936 } 1963 }
1937
1938 break; 1964 break;
1939 default: 1965 default:
1940 break; 1966 break;
@@ -1943,18 +1969,20 @@ static int sctp_process_unk_param(const struct sctp_association *asoc,
1943 return retval; 1969 return retval;
1944} 1970}
1945 1971
1946/* Find unrecognized parameters in the chunk. 1972/* Verify variable length parameters
1947 * Return values: 1973 * Return values:
1948 * 0 - discard the chunk 1974 * SCTP_IERROR_ABORT - trigger an ABORT
1949 * 1 - continue with the chunk 1975 * SCTP_IERROR_NOMEM - out of memory (abort)
1976 * SCTP_IERROR_ERROR - stop processing, trigger an ERROR
1977 * SCTP_IERROR_NO_ERROR - continue with the chunk
1950 */ 1978 */
1951static int sctp_verify_param(const struct sctp_association *asoc, 1979static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
1952 union sctp_params param, 1980 union sctp_params param,
1953 sctp_cid_t cid, 1981 sctp_cid_t cid,
1954 struct sctp_chunk *chunk, 1982 struct sctp_chunk *chunk,
1955 struct sctp_chunk **err_chunk) 1983 struct sctp_chunk **err_chunk)
1956{ 1984{
1957 int retval = 1; 1985 int retval = SCTP_IERROR_NO_ERROR;
1958 1986
1959 /* FIXME - This routine is not looking at each parameter per the 1987 /* FIXME - This routine is not looking at each parameter per the
1960 * chunk type, i.e., unrecognized parameters should be further 1988 * chunk type, i.e., unrecognized parameters should be further
@@ -1971,12 +1999,23 @@ static int sctp_verify_param(const struct sctp_association *asoc,
1971 case SCTP_PARAM_UNRECOGNIZED_PARAMETERS: 1999 case SCTP_PARAM_UNRECOGNIZED_PARAMETERS:
1972 case SCTP_PARAM_ECN_CAPABLE: 2000 case SCTP_PARAM_ECN_CAPABLE:
1973 case SCTP_PARAM_ADAPTATION_LAYER_IND: 2001 case SCTP_PARAM_ADAPTATION_LAYER_IND:
2002 break;
2003
1974 case SCTP_PARAM_SUPPORTED_EXT: 2004 case SCTP_PARAM_SUPPORTED_EXT:
2005 if (!sctp_verify_ext_param(param))
2006 return SCTP_IERROR_ABORT;
1975 break; 2007 break;
1976 2008
2009 case SCTP_PARAM_SET_PRIMARY:
2010 if (sctp_addip_enable)
2011 break;
2012 goto fallthrough;
2013
1977 case SCTP_PARAM_HOST_NAME_ADDRESS: 2014 case SCTP_PARAM_HOST_NAME_ADDRESS:
1978 /* Tell the peer, we won't support this param. */ 2015 /* Tell the peer, we won't support this param. */
1979 return sctp_process_hn_param(asoc, param, chunk, err_chunk); 2016 sctp_process_hn_param(asoc, param, chunk, err_chunk);
2017 retval = SCTP_IERROR_ABORT;
2018 break;
1980 2019
1981 case SCTP_PARAM_FWD_TSN_SUPPORT: 2020 case SCTP_PARAM_FWD_TSN_SUPPORT:
1982 if (sctp_prsctp_enable) 2021 if (sctp_prsctp_enable)
@@ -1993,9 +2032,11 @@ static int sctp_verify_param(const struct sctp_association *asoc,
1993 * cause 'Protocol Violation'. 2032 * cause 'Protocol Violation'.
1994 */ 2033 */
1995 if (SCTP_AUTH_RANDOM_LENGTH != 2034 if (SCTP_AUTH_RANDOM_LENGTH !=
1996 ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) 2035 ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) {
1997 return sctp_process_inv_paramlength(asoc, param.p, 2036 sctp_process_inv_paramlength(asoc, param.p,
1998 chunk, err_chunk); 2037 chunk, err_chunk);
2038 retval = SCTP_IERROR_ABORT;
2039 }
1999 break; 2040 break;
2000 2041
2001 case SCTP_PARAM_CHUNKS: 2042 case SCTP_PARAM_CHUNKS:
@@ -2007,21 +2048,22 @@ static int sctp_verify_param(const struct sctp_association *asoc,
2007 * INIT-ACK chunk if the sender wants to receive authenticated 2048 * INIT-ACK chunk if the sender wants to receive authenticated
2008 * chunks. Its maximum length is 260 bytes. 2049 * chunks. Its maximum length is 260 bytes.
2009 */ 2050 */
2010 if (260 < ntohs(param.p->length)) 2051 if (260 < ntohs(param.p->length)) {
2011 return sctp_process_inv_paramlength(asoc, param.p, 2052 sctp_process_inv_paramlength(asoc, param.p,
2012 chunk, err_chunk); 2053 chunk, err_chunk);
2054 retval = SCTP_IERROR_ABORT;
2055 }
2013 break; 2056 break;
2014 2057
2015 case SCTP_PARAM_HMAC_ALGO: 2058 case SCTP_PARAM_HMAC_ALGO:
2016 if (!sctp_auth_enable) 2059 if (sctp_auth_enable)
2017 break; 2060 break;
2018 /* Fall Through */ 2061 /* Fall Through */
2019fallthrough: 2062fallthrough:
2020 default: 2063 default:
2021 SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", 2064 SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n",
2022 ntohs(param.p->type), cid); 2065 ntohs(param.p->type), cid);
2023 return sctp_process_unk_param(asoc, param, chunk, err_chunk); 2066 retval = sctp_process_unk_param(asoc, param, chunk, err_chunk);
2024
2025 break; 2067 break;
2026 } 2068 }
2027 return retval; 2069 return retval;
@@ -2036,6 +2078,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
2036{ 2078{
2037 union sctp_params param; 2079 union sctp_params param;
2038 int has_cookie = 0; 2080 int has_cookie = 0;
2081 int result;
2039 2082
2040 /* Verify stream values are non-zero. */ 2083 /* Verify stream values are non-zero. */
2041 if ((0 == peer_init->init_hdr.num_outbound_streams) || 2084 if ((0 == peer_init->init_hdr.num_outbound_streams) ||
@@ -2043,8 +2086,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
2043 (0 == peer_init->init_hdr.init_tag) || 2086 (0 == peer_init->init_hdr.init_tag) ||
2044 (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) { 2087 (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) {
2045 2088
2046 sctp_process_inv_mandatory(asoc, chunk, errp); 2089 return sctp_process_inv_mandatory(asoc, chunk, errp);
2047 return 0;
2048 } 2090 }
2049 2091
2050 /* Check for missing mandatory parameters. */ 2092 /* Check for missing mandatory parameters. */
@@ -2062,29 +2104,29 @@ int sctp_verify_init(const struct sctp_association *asoc,
2062 * VIOLATION error. We build the ERROR chunk here and let the normal 2104 * VIOLATION error. We build the ERROR chunk here and let the normal
2063 * error handling code build and send the packet. 2105 * error handling code build and send the packet.
2064 */ 2106 */
2065 if (param.v != (void*)chunk->chunk_end) { 2107 if (param.v != (void*)chunk->chunk_end)
2066 sctp_process_inv_paramlength(asoc, param.p, chunk, errp); 2108 return sctp_process_inv_paramlength(asoc, param.p, chunk, errp);
2067 return 0;
2068 }
2069 2109
2070 /* The only missing mandatory param possible today is 2110 /* The only missing mandatory param possible today is
2071 * the state cookie for an INIT-ACK chunk. 2111 * the state cookie for an INIT-ACK chunk.
2072 */ 2112 */
2073 if ((SCTP_CID_INIT_ACK == cid) && !has_cookie) { 2113 if ((SCTP_CID_INIT_ACK == cid) && !has_cookie)
2074 sctp_process_missing_param(asoc, SCTP_PARAM_STATE_COOKIE, 2114 return sctp_process_missing_param(asoc, SCTP_PARAM_STATE_COOKIE,
2075 chunk, errp); 2115 chunk, errp);
2076 return 0;
2077 }
2078
2079 /* Find unrecognized parameters. */
2080 2116
2117 /* Verify all the variable length parameters */
2081 sctp_walk_params(param, peer_init, init_hdr.params) { 2118 sctp_walk_params(param, peer_init, init_hdr.params) {
2082 2119
2083 if (!sctp_verify_param(asoc, param, cid, chunk, errp)) { 2120 result = sctp_verify_param(asoc, param, cid, chunk, errp);
2084 if (SCTP_PARAM_HOST_NAME_ADDRESS == param.p->type) 2121 switch (result) {
2122 case SCTP_IERROR_ABORT:
2123 case SCTP_IERROR_NOMEM:
2085 return 0; 2124 return 0;
2086 else 2125 case SCTP_IERROR_ERROR:
2087 return 1; 2126 return 1;
2127 case SCTP_IERROR_NO_ERROR:
2128 default:
2129 break;
2088 } 2130 }
2089 2131
2090 } /* for (loop through all parameters) */ 2132 } /* for (loop through all parameters) */
@@ -2134,14 +2176,19 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
2134 !asoc->peer.peer_hmacs)) 2176 !asoc->peer.peer_hmacs))
2135 asoc->peer.auth_capable = 0; 2177 asoc->peer.auth_capable = 0;
2136 2178
2137 2179 /* In a non-backward compatible mode, if the peer claims
2138 /* If the peer claims support for ADD-IP without support 2180 * support for ADD-IP but not AUTH, the ADD-IP spec states
2139 * for AUTH, disable support for ADD-IP. 2181 * that we MUST ABORT the association. Section 6. The section
2182 * also give us an option to silently ignore the packet, which
2183 * is what we'll do here.
2140 */ 2184 */
2141 if (asoc->peer.addip_capable && !asoc->peer.auth_capable) { 2185 if (!sctp_addip_noauth &&
2186 (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
2142 asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | 2187 asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP |
2143 SCTP_PARAM_DEL_IP | 2188 SCTP_PARAM_DEL_IP |
2144 SCTP_PARAM_SET_PRIMARY); 2189 SCTP_PARAM_SET_PRIMARY);
2190 asoc->peer.asconf_capable = 0;
2191 goto clean_up;
2145 } 2192 }
2146 2193
2147 /* Walk list of transports, removing transports in the UNKNOWN state. */ 2194 /* Walk list of transports, removing transports in the UNKNOWN state. */
@@ -2283,6 +2330,8 @@ static int sctp_process_param(struct sctp_association *asoc,
2283 sctp_scope_t scope; 2330 sctp_scope_t scope;
2284 time_t stale; 2331 time_t stale;
2285 struct sctp_af *af; 2332 struct sctp_af *af;
2333 union sctp_addr_param *addr_param;
2334 struct sctp_transport *t;
2286 2335
2287 /* We maintain all INIT parameters in network byte order all the 2336 /* We maintain all INIT parameters in network byte order all the
2288 * time. This allows us to not worry about whether the parameters 2337 * time. This allows us to not worry about whether the parameters
@@ -2373,6 +2422,26 @@ static int sctp_process_param(struct sctp_association *asoc,
2373 asoc->peer.adaptation_ind = param.aind->adaptation_ind; 2422 asoc->peer.adaptation_ind = param.aind->adaptation_ind;
2374 break; 2423 break;
2375 2424
2425 case SCTP_PARAM_SET_PRIMARY:
2426 addr_param = param.v + sizeof(sctp_addip_param_t);
2427
2428 af = sctp_get_af_specific(param_type2af(param.p->type));
2429 af->from_addr_param(&addr, addr_param,
2430 htons(asoc->peer.port), 0);
2431
2432 /* if the address is invalid, we can't process it.
2433 * XXX: see spec for what to do.
2434 */
2435 if (!af->addr_valid(&addr, NULL, NULL))
2436 break;
2437
2438 t = sctp_assoc_lookup_paddr(asoc, &addr);
2439 if (!t)
2440 break;
2441
2442 sctp_assoc_set_primary(asoc, t);
2443 break;
2444
2376 case SCTP_PARAM_SUPPORTED_EXT: 2445 case SCTP_PARAM_SUPPORTED_EXT:
2377 sctp_process_ext_param(asoc, param); 2446 sctp_process_ext_param(asoc, param);
2378 break; 2447 break;
@@ -2724,7 +2793,6 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2724 struct sctp_transport *peer; 2793 struct sctp_transport *peer;
2725 struct sctp_af *af; 2794 struct sctp_af *af;
2726 union sctp_addr addr; 2795 union sctp_addr addr;
2727 struct list_head *pos;
2728 union sctp_addr_param *addr_param; 2796 union sctp_addr_param *addr_param;
2729 2797
2730 addr_param = (union sctp_addr_param *) 2798 addr_param = (union sctp_addr_param *)
@@ -2735,8 +2803,24 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2735 return SCTP_ERROR_INV_PARAM; 2803 return SCTP_ERROR_INV_PARAM;
2736 2804
2737 af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); 2805 af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
2806
2807 /* ADDIP 4.2.1 This parameter MUST NOT contain a broadcast
2808 * or multicast address.
2809 * (note: wildcard is permitted and requires special handling so
2810 * make sure we check for that)
2811 */
2812 if (!af->is_any(&addr) && !af->addr_valid(&addr, NULL, asconf->skb))
2813 return SCTP_ERROR_INV_PARAM;
2814
2738 switch (asconf_param->param_hdr.type) { 2815 switch (asconf_param->param_hdr.type) {
2739 case SCTP_PARAM_ADD_IP: 2816 case SCTP_PARAM_ADD_IP:
2817 /* Section 4.2.1:
2818 * If the address 0.0.0.0 or ::0 is provided, the source
2819 * address of the packet MUST be added.
2820 */
2821 if (af->is_any(&addr))
2822 memcpy(&addr, &asconf->source, sizeof(addr));
2823
2740 /* ADDIP 4.3 D9) If an endpoint receives an ADD IP address 2824 /* ADDIP 4.3 D9) If an endpoint receives an ADD IP address
2741 * request and does not have the local resources to add this 2825 * request and does not have the local resources to add this
2742 * new address to the association, it MUST return an Error 2826 * new address to the association, it MUST return an Error
@@ -2758,8 +2842,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2758 * MUST send an Error Cause TLV with the error cause set to the 2842 * MUST send an Error Cause TLV with the error cause set to the
2759 * new error code 'Request to Delete Last Remaining IP Address'. 2843 * new error code 'Request to Delete Last Remaining IP Address'.
2760 */ 2844 */
2761 pos = asoc->peer.transport_addr_list.next; 2845 if (asoc->peer.transport_count == 1)
2762 if (pos->next == &asoc->peer.transport_addr_list)
2763 return SCTP_ERROR_DEL_LAST_IP; 2846 return SCTP_ERROR_DEL_LAST_IP;
2764 2847
2765 /* ADDIP 4.3 D8) If a request is received to delete an IP 2848 /* ADDIP 4.3 D8) If a request is received to delete an IP
@@ -2772,9 +2855,27 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2772 if (sctp_cmp_addr_exact(sctp_source(asconf), &addr)) 2855 if (sctp_cmp_addr_exact(sctp_source(asconf), &addr))
2773 return SCTP_ERROR_DEL_SRC_IP; 2856 return SCTP_ERROR_DEL_SRC_IP;
2774 2857
2775 sctp_assoc_del_peer(asoc, &addr); 2858 /* Section 4.2.2
2859 * If the address 0.0.0.0 or ::0 is provided, all
2860 * addresses of the peer except the source address of the
2861 * packet MUST be deleted.
2862 */
2863 if (af->is_any(&addr)) {
2864 sctp_assoc_set_primary(asoc, asconf->transport);
2865 sctp_assoc_del_nonprimary_peers(asoc,
2866 asconf->transport);
2867 } else
2868 sctp_assoc_del_peer(asoc, &addr);
2776 break; 2869 break;
2777 case SCTP_PARAM_SET_PRIMARY: 2870 case SCTP_PARAM_SET_PRIMARY:
2871 /* ADDIP Section 4.2.4
2872 * If the address 0.0.0.0 or ::0 is provided, the receiver
2873 * MAY mark the source address of the packet as its
2874 * primary.
2875 */
2876 if (af->is_any(&addr))
2877 memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
2878
2778 peer = sctp_assoc_lookup_paddr(asoc, &addr); 2879 peer = sctp_assoc_lookup_paddr(asoc, &addr);
2779 if (!peer) 2880 if (!peer)
2780 return SCTP_ERROR_INV_PARAM; 2881 return SCTP_ERROR_INV_PARAM;
@@ -2848,10 +2949,11 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
2848 2949
2849 __be16 err_code; 2950 __be16 err_code;
2850 int length = 0; 2951 int length = 0;
2851 int chunk_len = asconf->skb->len; 2952 int chunk_len;
2852 __u32 serial; 2953 __u32 serial;
2853 int all_param_pass = 1; 2954 int all_param_pass = 1;
2854 2955
2956 chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
2855 hdr = (sctp_addiphdr_t *)asconf->skb->data; 2957 hdr = (sctp_addiphdr_t *)asconf->skb->data;
2856 serial = ntohl(hdr->serial); 2958 serial = ntohl(hdr->serial);
2857 2959
@@ -2861,7 +2963,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
2861 chunk_len -= length; 2963 chunk_len -= length;
2862 2964
2863 /* Skip the address parameter and store a pointer to the first 2965 /* Skip the address parameter and store a pointer to the first
2864 * asconf paramter. 2966 * asconf parameter.
2865 */ 2967 */
2866 length = ntohs(addr_param->v4.param_hdr.length); 2968 length = ntohs(addr_param->v4.param_hdr.length);
2867 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); 2969 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length);
@@ -2870,7 +2972,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
2870 /* create an ASCONF_ACK chunk. 2972 /* create an ASCONF_ACK chunk.
2871 * Based on the definitions of parameters, we know that the size of 2973 * Based on the definitions of parameters, we know that the size of
2872 * ASCONF_ACK parameters are less than or equal to the twice of ASCONF 2974 * ASCONF_ACK parameters are less than or equal to the twice of ASCONF
2873 * paramters. 2975 * parameters.
2874 */ 2976 */
2875 asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 2); 2977 asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 2);
2876 if (!asconf_ack) 2978 if (!asconf_ack)
@@ -2917,11 +3019,9 @@ done:
2917 * after freeing the reference to old asconf ack if any. 3019 * after freeing the reference to old asconf ack if any.
2918 */ 3020 */
2919 if (asconf_ack) { 3021 if (asconf_ack) {
2920 if (asoc->addip_last_asconf_ack)
2921 sctp_chunk_free(asoc->addip_last_asconf_ack);
2922
2923 sctp_chunk_hold(asconf_ack); 3022 sctp_chunk_hold(asconf_ack);
2924 asoc->addip_last_asconf_ack = asconf_ack; 3023 list_add_tail(&asconf_ack->transmitted_list,
3024 &asoc->asconf_ack_list);
2925 } 3025 }
2926 3026
2927 return asconf_ack; 3027 return asconf_ack;
@@ -2952,13 +3052,17 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
2952 /* This is always done in BH context with a socket lock 3052 /* This is always done in BH context with a socket lock
2953 * held, so the list can not change. 3053 * held, so the list can not change.
2954 */ 3054 */
3055 local_bh_disable();
2955 list_for_each_entry(saddr, &bp->address_list, list) { 3056 list_for_each_entry(saddr, &bp->address_list, list) {
2956 if (sctp_cmp_addr_exact(&saddr->a, &addr)) 3057 if (sctp_cmp_addr_exact(&saddr->a, &addr))
2957 saddr->use_as_src = 1; 3058 saddr->state = SCTP_ADDR_SRC;
2958 } 3059 }
3060 local_bh_enable();
2959 break; 3061 break;
2960 case SCTP_PARAM_DEL_IP: 3062 case SCTP_PARAM_DEL_IP:
2961 retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh); 3063 local_bh_disable();
3064 retval = sctp_del_bind_addr(bp, &addr);
3065 local_bh_enable();
2962 list_for_each(pos, &asoc->peer.transport_addr_list) { 3066 list_for_each(pos, &asoc->peer.transport_addr_list) {
2963 transport = list_entry(pos, struct sctp_transport, 3067 transport = list_entry(pos, struct sctp_transport,
2964 transports); 3068 transports);
@@ -2990,7 +3094,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
2990 sctp_addip_param_t *asconf_ack_param; 3094 sctp_addip_param_t *asconf_ack_param;
2991 sctp_errhdr_t *err_param; 3095 sctp_errhdr_t *err_param;
2992 int length; 3096 int length;
2993 int asconf_ack_len = asconf_ack->skb->len; 3097 int asconf_ack_len;
2994 __be16 err_code; 3098 __be16 err_code;
2995 3099
2996 if (no_err) 3100 if (no_err)
@@ -2998,6 +3102,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
2998 else 3102 else
2999 err_code = SCTP_ERROR_REQ_REFUSED; 3103 err_code = SCTP_ERROR_REQ_REFUSED;
3000 3104
3105 asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) -
3106 sizeof(sctp_chunkhdr_t);
3107
3001 /* Skip the addiphdr from the asconf_ack chunk and store a pointer to 3108 /* Skip the addiphdr from the asconf_ack chunk and store a pointer to
3002 * the first asconf_ack parameter. 3109 * the first asconf_ack parameter.
3003 */ 3110 */
@@ -3057,7 +3164,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
3057 asconf_len -= length; 3164 asconf_len -= length;
3058 3165
3059 /* Skip the address parameter in the last asconf sent and store a 3166 /* Skip the address parameter in the last asconf sent and store a
3060 * pointer to the first asconf paramter. 3167 * pointer to the first asconf parameter.
3061 */ 3168 */
3062 length = ntohs(addr_param->v4.param_hdr.length); 3169 length = ntohs(addr_param->v4.param_hdr.length);
3063 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); 3170 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index bbdc938da86f..78d1a8a49bd0 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -453,6 +453,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
453 * maximum value discussed in rule C7 above (RTO.max) may be 453 * maximum value discussed in rule C7 above (RTO.max) may be
454 * used to provide an upper bound to this doubling operation. 454 * used to provide an upper bound to this doubling operation.
455 */ 455 */
456 transport->last_rto = transport->rto;
456 transport->rto = min((transport->rto * 2), transport->asoc->rto_max); 457 transport->rto = min((transport->rto * 2), transport->asoc->rto_max);
457} 458}
458 459
@@ -1267,6 +1268,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1267 sctp_ootb_pkt_free(packet); 1268 sctp_ootb_pkt_free(packet);
1268 break; 1269 break;
1269 1270
1271 case SCTP_CMD_T1_RETRAN:
1272 /* Mark a transport for retransmission. */
1273 sctp_retransmit(&asoc->outqueue, cmd->obj.transport,
1274 SCTP_RTXR_T1_RTX);
1275 break;
1276
1270 case SCTP_CMD_RETRAN: 1277 case SCTP_CMD_RETRAN:
1271 /* Mark a transport for retransmission. */ 1278 /* Mark a transport for retransmission. */
1272 sctp_retransmit(&asoc->outqueue, cmd->obj.transport, 1279 sctp_retransmit(&asoc->outqueue, cmd->obj.transport,
@@ -1393,7 +1400,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1393 list_for_each(pos, &asoc->peer.transport_addr_list) { 1400 list_for_each(pos, &asoc->peer.transport_addr_list) {
1394 t = list_entry(pos, struct sctp_transport, 1401 t = list_entry(pos, struct sctp_transport,
1395 transports); 1402 transports);
1396 sctp_retransmit_mark(&asoc->outqueue, t, 0); 1403 sctp_retransmit_mark(&asoc->outqueue, t,
1404 SCTP_RTXR_T1_RTX);
1397 } 1405 }
1398 1406
1399 sctp_add_cmd_sf(commands, 1407 sctp_add_cmd_sf(commands,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f01b408508ff..61cbd5a8dd0c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -143,6 +143,12 @@ static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
143 const sctp_subtype_t type, 143 const sctp_subtype_t type,
144 struct sctp_chunk *chunk); 144 struct sctp_chunk *chunk);
145 145
146static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
147 const struct sctp_association *asoc,
148 const sctp_subtype_t type,
149 void *arg,
150 sctp_cmd_seq_t *commands);
151
146/* Small helper function that checks if the chunk length 152/* Small helper function that checks if the chunk length
147 * is of the appropriate length. The 'required_length' argument 153 * is of the appropriate length. The 'required_length' argument
148 * is set to be the size of a specific chunk we are testing. 154 * is set to be the size of a specific chunk we are testing.
@@ -475,7 +481,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
475 sctp_init_chunk_t *initchunk; 481 sctp_init_chunk_t *initchunk;
476 struct sctp_chunk *err_chunk; 482 struct sctp_chunk *err_chunk;
477 struct sctp_packet *packet; 483 struct sctp_packet *packet;
478 sctp_error_t error;
479 484
480 if (!sctp_vtag_verify(chunk, asoc)) 485 if (!sctp_vtag_verify(chunk, asoc))
481 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 486 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -500,8 +505,12 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
500 (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, 505 (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
501 &err_chunk)) { 506 &err_chunk)) {
502 507
508 sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
509
503 /* This chunk contains fatal error. It is to be discarded. 510 /* This chunk contains fatal error. It is to be discarded.
504 * Send an ABORT, with causes if there is any. 511 * Send an ABORT, with causes. If there are no causes,
512 * then there wasn't enough memory. Just terminate
513 * the association.
505 */ 514 */
506 if (err_chunk) { 515 if (err_chunk) {
507 packet = sctp_abort_pkt_new(ep, asoc, arg, 516 packet = sctp_abort_pkt_new(ep, asoc, arg,
@@ -517,12 +526,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
517 SCTP_PACKET(packet)); 526 SCTP_PACKET(packet));
518 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); 527 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
519 error = SCTP_ERROR_INV_PARAM; 528 error = SCTP_ERROR_INV_PARAM;
520 } else {
521 error = SCTP_ERROR_NO_RESOURCE;
522 } 529 }
523 } else {
524 sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
525 error = SCTP_ERROR_INV_PARAM;
526 } 530 }
527 531
528 /* SCTP-AUTH, Section 6.3: 532 /* SCTP-AUTH, Section 6.3:
@@ -959,7 +963,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
959{ 963{
960 struct sctp_transport *transport = (struct sctp_transport *) arg; 964 struct sctp_transport *transport = (struct sctp_transport *) arg;
961 965
962 if (asoc->overall_error_count >= asoc->max_retrans) { 966 if (asoc->overall_error_count > asoc->max_retrans) {
963 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, 967 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
964 SCTP_ERROR(ETIMEDOUT)); 968 SCTP_ERROR(ETIMEDOUT));
965 /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ 969 /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
@@ -1146,7 +1150,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1146 /* Check if the timestamp looks valid. */ 1150 /* Check if the timestamp looks valid. */
1147 if (time_after(hbinfo->sent_at, jiffies) || 1151 if (time_after(hbinfo->sent_at, jiffies) ||
1148 time_after(jiffies, hbinfo->sent_at + max_interval)) { 1152 time_after(jiffies, hbinfo->sent_at + max_interval)) {
1149 SCTP_DEBUG_PRINTK("%s: HEARTBEAT ACK with invalid timestamp" 1153 SCTP_DEBUG_PRINTK("%s: HEARTBEAT ACK with invalid timestamp "
1150 "received for transport: %p\n", 1154 "received for transport: %p\n",
1151 __FUNCTION__, link); 1155 __FUNCTION__, link);
1152 return SCTP_DISPOSITION_DISCARD; 1156 return SCTP_DISPOSITION_DISCARD;
@@ -1309,26 +1313,6 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc,
1309 new_asoc->c.initial_tsn = asoc->c.initial_tsn; 1313 new_asoc->c.initial_tsn = asoc->c.initial_tsn;
1310} 1314}
1311 1315
1312static void sctp_auth_params_populate(struct sctp_association *new_asoc,
1313 const struct sctp_association *asoc)
1314{
1315 /* Only perform this if AUTH extension is enabled */
1316 if (!sctp_auth_enable)
1317 return;
1318
1319 /* We need to provide the same parameter information as
1320 * was in the original INIT. This means that we need to copy
1321 * the HMACS, CHUNKS, and RANDOM parameter from the original
1322 * assocaition.
1323 */
1324 memcpy(new_asoc->c.auth_random, asoc->c.auth_random,
1325 sizeof(asoc->c.auth_random));
1326 memcpy(new_asoc->c.auth_hmacs, asoc->c.auth_hmacs,
1327 sizeof(asoc->c.auth_hmacs));
1328 memcpy(new_asoc->c.auth_chunks, asoc->c.auth_chunks,
1329 sizeof(asoc->c.auth_chunks));
1330}
1331
1332/* 1316/*
1333 * Compare vtag/tietag values to determine unexpected COOKIE-ECHO 1317 * Compare vtag/tietag values to determine unexpected COOKIE-ECHO
1334 * handling action. 1318 * handling action.
@@ -1486,8 +1470,6 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
1486 1470
1487 sctp_tietags_populate(new_asoc, asoc); 1471 sctp_tietags_populate(new_asoc, asoc);
1488 1472
1489 sctp_auth_params_populate(new_asoc, asoc);
1490
1491 /* B) "Z" shall respond immediately with an INIT ACK chunk. */ 1473 /* B) "Z" shall respond immediately with an INIT ACK chunk. */
1492 1474
1493 /* If there are errors need to be reported for unknown parameters, 1475 /* If there are errors need to be reported for unknown parameters,
@@ -2095,11 +2077,20 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
2095 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) 2077 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
2096 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2078 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2097 2079
2080 /* ADD-IP: Special case for ABORT chunks
2081 * F4) One special consideration is that ABORT Chunks arriving
2082 * destined to the IP address being deleted MUST be
2083 * ignored (see Section 5.3.1 for further details).
2084 */
2085 if (SCTP_ADDR_DEL ==
2086 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
2087 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
2088
2098 /* Stop the T5-shutdown guard timer. */ 2089 /* Stop the T5-shutdown guard timer. */
2099 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2090 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2100 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); 2091 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
2101 2092
2102 return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); 2093 return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
2103} 2094}
2104 2095
2105/* 2096/*
@@ -2131,6 +2122,15 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
2131 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) 2122 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
2132 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2123 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2133 2124
2125 /* ADD-IP: Special case for ABORT chunks
2126 * F4) One special consideration is that ABORT Chunks arriving
2127 * destined to the IP address being deleted MUST be
2128 * ignored (see Section 5.3.1 for further details).
2129 */
2130 if (SCTP_ADDR_DEL ==
2131 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
2132 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
2133
2134 /* Stop the T2-shutdown timer. */ 2134 /* Stop the T2-shutdown timer. */
2135 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2135 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2136 SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); 2136 SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2139,7 +2139,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
2139 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2139 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2140 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); 2140 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
2141 2141
2142 return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); 2142 return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
2143} 2143}
2144 2144
2145/* 2145/*
@@ -2305,7 +2305,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
2305 /* If we've sent any data bundled with COOKIE-ECHO we will need to 2305 /* If we've sent any data bundled with COOKIE-ECHO we will need to
2306 * resend 2306 * resend
2307 */ 2307 */
2308 sctp_add_cmd_sf(commands, SCTP_CMD_RETRAN, 2308 sctp_add_cmd_sf(commands, SCTP_CMD_T1_RETRAN,
2309 SCTP_TRANSPORT(asoc->peer.primary_path)); 2309 SCTP_TRANSPORT(asoc->peer.primary_path));
2310 2310
2311 /* Cast away the const modifier, as we want to just 2311 /* Cast away the const modifier, as we want to just
@@ -2366,8 +2366,6 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2366 sctp_cmd_seq_t *commands) 2366 sctp_cmd_seq_t *commands)
2367{ 2367{
2368 struct sctp_chunk *chunk = arg; 2368 struct sctp_chunk *chunk = arg;
2369 unsigned len;
2370 __be16 error = SCTP_ERROR_NO_ERROR;
2371 2369
2372 if (!sctp_vtag_verify_either(chunk, asoc)) 2370 if (!sctp_vtag_verify_either(chunk, asoc))
2373 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2371 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2385,6 +2383,28 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2385 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) 2383 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
2386 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2384 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2387 2385
2386 /* ADD-IP: Special case for ABORT chunks
2387 * F4) One special consideration is that ABORT Chunks arriving
2388 * destined to the IP address being deleted MUST be
2389 * ignored (see Section 5.3.1 for further details).
2390 */
2391 if (SCTP_ADDR_DEL ==
2392 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
2393 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
2394
2395 return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
2396}
2397
2398static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2399 const struct sctp_association *asoc,
2400 const sctp_subtype_t type,
2401 void *arg,
2402 sctp_cmd_seq_t *commands)
2403{
2404 struct sctp_chunk *chunk = arg;
2405 unsigned len;
2406 __be16 error = SCTP_ERROR_NO_ERROR;
2407
2388 /* See if we have an error cause code in the chunk. */ 2408 /* See if we have an error cause code in the chunk. */
2389 len = ntohs(chunk->chunk_hdr->length); 2409 len = ntohs(chunk->chunk_hdr->length);
2390 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) 2410 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
@@ -3399,6 +3419,15 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
3399 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 3419 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
3400 } 3420 }
3401 3421
3422 /* ADD-IP: Section 4.1.1
3423 * This chunk MUST be sent in an authenticated way by using
3424 * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
3425 * is received unauthenticated it MUST be silently discarded as
3426 * described in [I-D.ietf-tsvwg-sctp-auth].
3427 */
3428 if (!sctp_addip_noauth && !chunk->auth)
3429 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
3430
3402 /* Make sure that the ASCONF ADDIP chunk has a valid length. */ 3431 /* Make sure that the ASCONF ADDIP chunk has a valid length. */
3403 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t))) 3432 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
3404 return sctp_sf_violation_chunklen(ep, asoc, type, arg, 3433 return sctp_sf_violation_chunklen(ep, asoc, type, arg,
@@ -3415,48 +3444,68 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
3415 3444
3416 /* Verify the ASCONF chunk before processing it. */ 3445 /* Verify the ASCONF chunk before processing it. */
3417 if (!sctp_verify_asconf(asoc, 3446 if (!sctp_verify_asconf(asoc,
3418 (sctp_paramhdr_t *)((void *)addr_param + length), 3447 (sctp_paramhdr_t *)((void *)addr_param + length),
3419 (void *)chunk->chunk_end, 3448 (void *)chunk->chunk_end,
3420 &err_param)) 3449 &err_param))
3421 return sctp_sf_violation_paramlen(ep, asoc, type, 3450 return sctp_sf_violation_paramlen(ep, asoc, type,
3422 (void *)&err_param, commands); 3451 (void *)&err_param, commands);
3423 3452
3424 /* ADDIP 4.2 C1) Compare the value of the serial number to the value 3453 /* ADDIP 5.2 E1) Compare the value of the serial number to the value
3425 * the endpoint stored in a new association variable 3454 * the endpoint stored in a new association variable
3426 * 'Peer-Serial-Number'. 3455 * 'Peer-Serial-Number'.
3427 */ 3456 */
3428 if (serial == asoc->peer.addip_serial + 1) { 3457 if (serial == asoc->peer.addip_serial + 1) {
3429 /* ADDIP 4.2 C2) If the value found in the serial number is 3458 /* If this is the first instance of ASCONF in the packet,
3430 * equal to the ('Peer-Serial-Number' + 1), the endpoint MUST 3459 * we can clean our old ASCONF-ACKs.
3431 * do V1-V5. 3460 */
3461 if (!chunk->has_asconf)
3462 sctp_assoc_clean_asconf_ack_cache(asoc);
3463
3464 /* ADDIP 5.2 E4) When the Sequence Number matches the next one
3465 * expected, process the ASCONF as described below and after
3466 * processing the ASCONF Chunk, append an ASCONF-ACK Chunk to
3467 * the response packet and cache a copy of it (in the event it
3468 * later needs to be retransmitted).
3469 *
3470 * Essentially, do V1-V5.
3432 */ 3471 */
3433 asconf_ack = sctp_process_asconf((struct sctp_association *) 3472 asconf_ack = sctp_process_asconf((struct sctp_association *)
3434 asoc, chunk); 3473 asoc, chunk);
3435 if (!asconf_ack) 3474 if (!asconf_ack)
3436 return SCTP_DISPOSITION_NOMEM; 3475 return SCTP_DISPOSITION_NOMEM;
3437 } else if (serial == asoc->peer.addip_serial) { 3476 } else if (serial < asoc->peer.addip_serial + 1) {
3438 /* ADDIP 4.2 C3) If the value found in the serial number is 3477 /* ADDIP 5.2 E2)
3439 * equal to the value stored in the 'Peer-Serial-Number' 3478 * If the value found in the Sequence Number is less than the
3440 * IMPLEMENTATION NOTE: As an optimization a receiver may wish 3479 * ('Peer- Sequence-Number' + 1), simply skip to the next
3441 * to save the last ASCONF-ACK for some predetermined period of 3480 * ASCONF, and include in the outbound response packet
3442 * time and instead of re-processing the ASCONF (with the same 3481 * any previously cached ASCONF-ACK response that was
3443 * serial number) it may just re-transmit the ASCONF-ACK. 3482 * sent and saved that matches the Sequence Number of the
3483 * ASCONF. Note: It is possible that no cached ASCONF-ACK
3484 * Chunk exists. This will occur when an older ASCONF
3485 * arrives out of order. In such a case, the receiver
3486 * should skip the ASCONF Chunk and not include ASCONF-ACK
3487 * Chunk for that chunk.
3444 */ 3488 */
3445 if (asoc->addip_last_asconf_ack) 3489 asconf_ack = sctp_assoc_lookup_asconf_ack(asoc, hdr->serial);
3446 asconf_ack = asoc->addip_last_asconf_ack; 3490 if (!asconf_ack)
3447 else
3448 return SCTP_DISPOSITION_DISCARD; 3491 return SCTP_DISPOSITION_DISCARD;
3449 } else { 3492 } else {
3450 /* ADDIP 4.2 C4) Otherwise, the ASCONF Chunk is discarded since 3493 /* ADDIP 5.2 E5) Otherwise, the ASCONF Chunk is discarded since
3451 * it must be either a stale packet or from an attacker. 3494 * it must be either a stale packet or from an attacker.
3452 */ 3495 */
3453 return SCTP_DISPOSITION_DISCARD; 3496 return SCTP_DISPOSITION_DISCARD;
3454 } 3497 }
3455 3498
3456 /* ADDIP 4.2 C5) In both cases C2 and C3 the ASCONF-ACK MUST be sent 3499 /* ADDIP 5.2 E6) The destination address of the SCTP packet
3457 * back to the source address contained in the IP header of the ASCONF 3500 * containing the ASCONF-ACK Chunks MUST be the source address of
3458 * being responded to. 3501 * the SCTP packet that held the ASCONF Chunks.
3502 *
3503 * To do this properly, we'll set the destination address of the chunk
3504 * and at the transmit time, will try look up the transport to use.
3505 * Since ASCONFs may be bundled, the correct transport may not be
3506 * created untill we process the entire packet, thus this workaround.
3459 */ 3507 */
3508 asconf_ack->dest = chunk->source;
3460 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); 3509 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack));
3461 3510
3462 return SCTP_DISPOSITION_CONSUME; 3511 return SCTP_DISPOSITION_CONSUME;
@@ -3485,6 +3534,15 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
3485 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 3534 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
3486 } 3535 }
3487 3536
3537 /* ADD-IP, Section 4.1.2:
3538 * This chunk MUST be sent in an authenticated way by using
3539 * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
3540 * is received unauthenticated it MUST be silently discarded as
3541 * described in [I-D.ietf-tsvwg-sctp-auth].
3542 */
3543 if (!sctp_addip_noauth && !asconf_ack->auth)
3544 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
3545
3488 /* Make sure that the ADDIP chunk has a valid length. */ 3546 /* Make sure that the ADDIP chunk has a valid length. */
3489 if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t))) 3547 if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
3490 return sctp_sf_violation_chunklen(ep, asoc, type, arg, 3548 return sctp_sf_violation_chunklen(ep, asoc, type, arg,
@@ -4064,11 +4122,6 @@ static sctp_disposition_t sctp_sf_abort_violation(
4064 struct sctp_chunk *chunk = arg; 4122 struct sctp_chunk *chunk = arg;
4065 struct sctp_chunk *abort = NULL; 4123 struct sctp_chunk *abort = NULL;
4066 4124
4067 /* Make the abort chunk. */
4068 abort = sctp_make_abort_violation(asoc, chunk, payload, paylen);
4069 if (!abort)
4070 goto nomem;
4071
4072 /* SCTP-AUTH, Section 6.3: 4125 /* SCTP-AUTH, Section 6.3:
4073 * It should be noted that if the receiver wants to tear 4126 * It should be noted that if the receiver wants to tear
4074 * down an association in an authenticated way only, the 4127 * down an association in an authenticated way only, the
@@ -4083,6 +4136,11 @@ static sctp_disposition_t sctp_sf_abort_violation(
4083 if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) 4136 if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
4084 goto discard; 4137 goto discard;
4085 4138
4139 /* Make the abort chunk. */
4140 abort = sctp_make_abort_violation(asoc, chunk, payload, paylen);
4141 if (!abort)
4142 goto nomem;
4143
4086 if (asoc) { 4144 if (asoc) {
4087 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); 4145 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
4088 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); 4146 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
@@ -5785,7 +5843,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5785 /* 5843 /*
5786 * Also try to renege to limit our memory usage in the event that 5844 * Also try to renege to limit our memory usage in the event that
5787 * we are under memory pressure 5845 * we are under memory pressure
5788 * If we can't renege, don't worry about it, the sk_stream_rmem_schedule 5846 * If we can't renege, don't worry about it, the sk_rmem_schedule
5789 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our 5847 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
5790 * memory usage too much 5848 * memory usage too much
5791 */ 5849 */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index a93a4bc8f68f..e6016e41ffa0 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -457,11 +457,11 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
457 /* SCTP_STATE_ESTABLISHED */ \ 457 /* SCTP_STATE_ESTABLISHED */ \
458 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \ 458 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
459 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 459 /* SCTP_STATE_SHUTDOWN_PENDING */ \
460 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 460 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
461 /* SCTP_STATE_SHUTDOWN_SENT */ \ 461 /* SCTP_STATE_SHUTDOWN_SENT */ \
462 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 462 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
463 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 463 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
464 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 464 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
465 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ 465 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
466 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 466 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
467} /* TYPE_SCTP_ASCONF */ 467} /* TYPE_SCTP_ASCONF */
@@ -478,11 +478,11 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
478 /* SCTP_STATE_ESTABLISHED */ \ 478 /* SCTP_STATE_ESTABLISHED */ \
479 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \ 479 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
480 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 480 /* SCTP_STATE_SHUTDOWN_PENDING */ \
481 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 481 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
482 /* SCTP_STATE_SHUTDOWN_SENT */ \ 482 /* SCTP_STATE_SHUTDOWN_SENT */ \
483 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 483 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
484 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 484 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
485 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 485 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
486 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ 486 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
487 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 487 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
488} /* TYPE_SCTP_ASCONF_ACK */ 488} /* TYPE_SCTP_ASCONF_ACK */
@@ -691,11 +691,11 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
691 /* SCTP_STATE_ESTABLISHED */ \ 691 /* SCTP_STATE_ESTABLISHED */ \
692 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \ 692 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
693 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 693 /* SCTP_STATE_SHUTDOWN_PENDING */ \
694 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 694 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
695 /* SCTP_STATE_SHUTDOWN_SENT */ \ 695 /* SCTP_STATE_SHUTDOWN_SENT */ \
696 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 696 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
697 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 697 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
698 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 698 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
699 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ 699 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
700 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 700 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
701} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */ 701} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bd6f42a15a4b..710df67a6785 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -174,7 +174,8 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
174 sizeof(struct sctp_chunk); 174 sizeof(struct sctp_chunk);
175 175
176 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 176 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
177 sk_charge_skb(sk, chunk->skb); 177 sk->sk_wmem_queued += chunk->skb->truesize;
178 sk_mem_charge(sk, chunk->skb->truesize);
178} 179}
179 180
180/* Verify that this is a valid address. */ 181/* Verify that this is a valid address. */
@@ -390,7 +391,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
390 /* Add the address to the bind address list. 391 /* Add the address to the bind address list.
391 * Use GFP_ATOMIC since BHs will be disabled. 392 * Use GFP_ATOMIC since BHs will be disabled.
392 */ 393 */
393 ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); 394 ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC);
394 395
395 /* Copy back into socket for getsockname() use. */ 396 /* Copy back into socket for getsockname() use. */
396 if (!ret) { 397 if (!ret) {
@@ -585,8 +586,8 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
585 addr = (union sctp_addr *)addr_buf; 586 addr = (union sctp_addr *)addr_buf;
586 af = sctp_get_af_specific(addr->v4.sin_family); 587 af = sctp_get_af_specific(addr->v4.sin_family);
587 memcpy(&saveaddr, addr, af->sockaddr_len); 588 memcpy(&saveaddr, addr, af->sockaddr_len);
588 retval = sctp_add_bind_addr(bp, &saveaddr, 0, 589 retval = sctp_add_bind_addr(bp, &saveaddr,
589 GFP_ATOMIC); 590 SCTP_ADDR_NEW, GFP_ATOMIC);
590 addr_buf += af->sockaddr_len; 591 addr_buf += af->sockaddr_len;
591 } 592 }
592 } 593 }
@@ -660,7 +661,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
660 * socket routing and failover schemes. Refer to comments in 661 * socket routing and failover schemes. Refer to comments in
661 * sctp_do_bind(). -daisy 662 * sctp_do_bind(). -daisy
662 */ 663 */
663 retval = sctp_del_bind_addr(bp, sa_addr, call_rcu); 664 retval = sctp_del_bind_addr(bp, sa_addr);
664 665
665 addr_buf += af->sockaddr_len; 666 addr_buf += af->sockaddr_len;
666err_bindx_rem: 667err_bindx_rem:
@@ -777,7 +778,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
777 af = sctp_get_af_specific(laddr->v4.sin_family); 778 af = sctp_get_af_specific(laddr->v4.sin_family);
778 list_for_each_entry(saddr, &bp->address_list, list) { 779 list_for_each_entry(saddr, &bp->address_list, list) {
779 if (sctp_cmp_addr_exact(&saddr->a, laddr)) 780 if (sctp_cmp_addr_exact(&saddr->a, laddr))
780 saddr->use_as_src = 0; 781 saddr->state = SCTP_ADDR_DEL;
781 } 782 }
782 addr_buf += af->sockaddr_len; 783 addr_buf += af->sockaddr_len;
783 } 784 }
@@ -5307,6 +5308,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
5307{ 5308{
5308 struct sctp_bind_hashbucket *head; /* hash list */ 5309 struct sctp_bind_hashbucket *head; /* hash list */
5309 struct sctp_bind_bucket *pp; /* hash list port iterator */ 5310 struct sctp_bind_bucket *pp; /* hash list port iterator */
5311 struct hlist_node *node;
5310 unsigned short snum; 5312 unsigned short snum;
5311 int ret; 5313 int ret;
5312 5314
@@ -5331,7 +5333,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
5331 index = sctp_phashfn(rover); 5333 index = sctp_phashfn(rover);
5332 head = &sctp_port_hashtable[index]; 5334 head = &sctp_port_hashtable[index];
5333 sctp_spin_lock(&head->lock); 5335 sctp_spin_lock(&head->lock);
5334 for (pp = head->chain; pp; pp = pp->next) 5336 sctp_for_each_hentry(pp, node, &head->chain)
5335 if (pp->port == rover) 5337 if (pp->port == rover)
5336 goto next; 5338 goto next;
5337 break; 5339 break;
@@ -5358,7 +5360,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
5358 */ 5360 */
5359 head = &sctp_port_hashtable[sctp_phashfn(snum)]; 5361 head = &sctp_port_hashtable[sctp_phashfn(snum)];
5360 sctp_spin_lock(&head->lock); 5362 sctp_spin_lock(&head->lock);
5361 for (pp = head->chain; pp; pp = pp->next) { 5363 sctp_for_each_hentry(pp, node, &head->chain) {
5362 if (pp->port == snum) 5364 if (pp->port == snum)
5363 goto pp_found; 5365 goto pp_found;
5364 } 5366 }
@@ -5702,10 +5704,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
5702 pp->port = snum; 5704 pp->port = snum;
5703 pp->fastreuse = 0; 5705 pp->fastreuse = 0;
5704 INIT_HLIST_HEAD(&pp->owner); 5706 INIT_HLIST_HEAD(&pp->owner);
5705 if ((pp->next = head->chain) != NULL) 5707 hlist_add_head(&pp->node, &head->chain);
5706 pp->next->pprev = &pp->next;
5707 head->chain = pp;
5708 pp->pprev = &head->chain;
5709 } 5708 }
5710 return pp; 5709 return pp;
5711} 5710}
@@ -5714,9 +5713,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
5714static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) 5713static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
5715{ 5714{
5716 if (pp && hlist_empty(&pp->owner)) { 5715 if (pp && hlist_empty(&pp->owner)) {
5717 if (pp->next) 5716 __hlist_del(&pp->node);
5718 pp->next->pprev = pp->pprev;
5719 *(pp->pprev) = pp->next;
5720 kmem_cache_free(sctp_bucket_cachep, pp); 5717 kmem_cache_free(sctp_bucket_cachep, pp);
5721 SCTP_DBG_OBJCNT_DEC(bind_bucket); 5718 SCTP_DBG_OBJCNT_DEC(bind_bucket);
5722 } 5719 }
@@ -6012,7 +6009,8 @@ static void __sctp_write_space(struct sctp_association *asoc)
6012 */ 6009 */
6013 if (sock->fasync_list && 6010 if (sock->fasync_list &&
6014 !(sk->sk_shutdown & SEND_SHUTDOWN)) 6011 !(sk->sk_shutdown & SEND_SHUTDOWN))
6015 sock_wake_async(sock, 2, POLL_OUT); 6012 sock_wake_async(sock,
6013 SOCK_WAKE_SPACE, POLL_OUT);
6016 } 6014 }
6017 } 6015 }
6018} 6016}
@@ -6038,10 +6036,10 @@ static void sctp_wfree(struct sk_buff *skb)
6038 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 6036 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
6039 6037
6040 /* 6038 /*
6041 * This undoes what is done via sk_charge_skb 6039 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
6042 */ 6040 */
6043 sk->sk_wmem_queued -= skb->truesize; 6041 sk->sk_wmem_queued -= skb->truesize;
6044 sk->sk_forward_alloc += skb->truesize; 6042 sk_mem_uncharge(sk, skb->truesize);
6045 6043
6046 sock_wfree(skb); 6044 sock_wfree(skb);
6047 __sctp_write_space(asoc); 6045 __sctp_write_space(asoc);
@@ -6062,9 +6060,9 @@ void sctp_sock_rfree(struct sk_buff *skb)
6062 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); 6060 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
6063 6061
6064 /* 6062 /*
6065 * Mimic the behavior of sk_stream_rfree 6063 * Mimic the behavior of sock_rfree
6066 */ 6064 */
6067 sk->sk_forward_alloc += event->rmem_len; 6065 sk_mem_uncharge(sk, event->rmem_len);
6068} 6066}
6069 6067
6070 6068
@@ -6329,7 +6327,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
6329 struct sctp_endpoint *newep = newsp->ep; 6327 struct sctp_endpoint *newep = newsp->ep;
6330 struct sk_buff *skb, *tmp; 6328 struct sk_buff *skb, *tmp;
6331 struct sctp_ulpevent *event; 6329 struct sctp_ulpevent *event;
6332 int flags = 0; 6330 struct sctp_bind_hashbucket *head;
6333 6331
6334 /* Migrate socket buffer sizes and all the socket level options to the 6332 /* Migrate socket buffer sizes and all the socket level options to the
6335 * new socket. 6333 * new socket.
@@ -6346,23 +6344,21 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
6346 newsp->hmac = NULL; 6344 newsp->hmac = NULL;
6347 6345
6348 /* Hook this new socket in to the bind_hash list. */ 6346 /* Hook this new socket in to the bind_hash list. */
6347 head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->num)];
6348 sctp_local_bh_disable();
6349 sctp_spin_lock(&head->lock);
6349 pp = sctp_sk(oldsk)->bind_hash; 6350 pp = sctp_sk(oldsk)->bind_hash;
6350 sk_add_bind_node(newsk, &pp->owner); 6351 sk_add_bind_node(newsk, &pp->owner);
6351 sctp_sk(newsk)->bind_hash = pp; 6352 sctp_sk(newsk)->bind_hash = pp;
6352 inet_sk(newsk)->num = inet_sk(oldsk)->num; 6353 inet_sk(newsk)->num = inet_sk(oldsk)->num;
6354 sctp_spin_unlock(&head->lock);
6355 sctp_local_bh_enable();
6353 6356
6354 /* Copy the bind_addr list from the original endpoint to the new 6357 /* Copy the bind_addr list from the original endpoint to the new
6355 * endpoint so that we can handle restarts properly 6358 * endpoint so that we can handle restarts properly
6356 */ 6359 */
6357 if (PF_INET6 == assoc->base.sk->sk_family) 6360 sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
6358 flags = SCTP_ADDR6_ALLOWED; 6361 &oldsp->ep->base.bind_addr, GFP_KERNEL);
6359 if (assoc->peer.ipv4_address)
6360 flags |= SCTP_ADDR4_PEERSUPP;
6361 if (assoc->peer.ipv6_address)
6362 flags |= SCTP_ADDR6_PEERSUPP;
6363 sctp_bind_addr_copy(&newsp->ep->base.bind_addr,
6364 &oldsp->ep->base.bind_addr,
6365 SCTP_SCOPE_GLOBAL, GFP_KERNEL, flags);
6366 6362
6367 /* Move any messages in the old socket's receive queue that are for the 6363 /* Move any messages in the old socket's receive queue that are for the
6368 * peeled off association to the new socket's receive queue. 6364 * peeled off association to the new socket's receive queue.
@@ -6455,6 +6451,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
6455} 6451}
6456 6452
6457 6453
6454DEFINE_PROTO_INUSE(sctp)
6455
6458/* This proto struct describes the ULP interface for SCTP. */ 6456/* This proto struct describes the ULP interface for SCTP. */
6459struct proto sctp_prot = { 6457struct proto sctp_prot = {
6460 .name = "SCTP", 6458 .name = "SCTP",
@@ -6483,9 +6481,12 @@ struct proto sctp_prot = {
6483 .memory_pressure = &sctp_memory_pressure, 6481 .memory_pressure = &sctp_memory_pressure,
6484 .enter_memory_pressure = sctp_enter_memory_pressure, 6482 .enter_memory_pressure = sctp_enter_memory_pressure,
6485 .memory_allocated = &sctp_memory_allocated, 6483 .memory_allocated = &sctp_memory_allocated,
6484 REF_PROTO_INUSE(sctp)
6486}; 6485};
6487 6486
6488#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 6487#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
6488DEFINE_PROTO_INUSE(sctpv6)
6489
6489struct proto sctpv6_prot = { 6490struct proto sctpv6_prot = {
6490 .name = "SCTPv6", 6491 .name = "SCTPv6",
6491 .owner = THIS_MODULE, 6492 .owner = THIS_MODULE,
@@ -6513,5 +6514,6 @@ struct proto sctpv6_prot = {
6513 .memory_pressure = &sctp_memory_pressure, 6514 .memory_pressure = &sctp_memory_pressure,
6514 .enter_memory_pressure = sctp_enter_memory_pressure, 6515 .enter_memory_pressure = sctp_enter_memory_pressure,
6515 .memory_allocated = &sctp_memory_allocated, 6516 .memory_allocated = &sctp_memory_allocated,
6517 REF_PROTO_INUSE(sctpv6)
6516}; 6518};
6517#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ 6519#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 0669778e4335..5eb6ea829b54 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -263,27 +263,22 @@ static ctl_table sctp_table[] = {
263 .proc_handler = &proc_dointvec, 263 .proc_handler = &proc_dointvec,
264 .strategy = &sysctl_intvec 264 .strategy = &sysctl_intvec
265 }, 265 },
266 { .ctl_name = 0 }
267};
268
269static ctl_table sctp_net_table[] = {
270 { 266 {
271 .ctl_name = NET_SCTP, 267 .ctl_name = CTL_UNNUMBERED,
272 .procname = "sctp", 268 .procname = "addip_noauth_enable",
273 .mode = 0555, 269 .data = &sctp_addip_noauth,
274 .child = sctp_table 270 .maxlen = sizeof(int),
271 .mode = 0644,
272 .proc_handler = &proc_dointvec,
273 .strategy = &sysctl_intvec
275 }, 274 },
276 { .ctl_name = 0 } 275 { .ctl_name = 0 }
277}; 276};
278 277
279static ctl_table sctp_root_table[] = { 278static struct ctl_path sctp_path[] = {
280 { 279 { .procname = "net", .ctl_name = CTL_NET, },
281 .ctl_name = CTL_NET, 280 { .procname = "sctp", .ctl_name = NET_SCTP, },
282 .procname = "net", 281 { }
283 .mode = 0555,
284 .child = sctp_net_table
285 },
286 { .ctl_name = 0 }
287}; 282};
288 283
289static struct ctl_table_header * sctp_sysctl_header; 284static struct ctl_table_header * sctp_sysctl_header;
@@ -291,7 +286,7 @@ static struct ctl_table_header * sctp_sysctl_header;
291/* Sysctl registration. */ 286/* Sysctl registration. */
292void sctp_sysctl_register(void) 287void sctp_sysctl_register(void)
293{ 288{
294 sctp_sysctl_header = register_sysctl_table(sctp_root_table); 289 sctp_sysctl_header = register_sysctl_paths(sctp_path, sctp_table);
295} 290}
296 291
297/* Sysctl deregistration. */ 292/* Sysctl deregistration. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 5f467c914f80..dfa109341aeb 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -74,8 +74,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
74 * given destination transport address, set RTO to the protocol 74 * given destination transport address, set RTO to the protocol
75 * parameter 'RTO.Initial'. 75 * parameter 'RTO.Initial'.
76 */ 76 */
77 peer->last_rto = peer->rto = msecs_to_jiffies(sctp_rto_initial);
77 peer->rtt = 0; 78 peer->rtt = 0;
78 peer->rto = msecs_to_jiffies(sctp_rto_initial);
79 peer->rttvar = 0; 79 peer->rttvar = 0;
80 peer->srtt = 0; 80 peer->srtt = 0;
81 peer->rto_pending = 0; 81 peer->rto_pending = 0;
@@ -99,15 +99,10 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
99 INIT_LIST_HEAD(&peer->send_ready); 99 INIT_LIST_HEAD(&peer->send_ready);
100 INIT_LIST_HEAD(&peer->transports); 100 INIT_LIST_HEAD(&peer->transports);
101 101
102 /* Set up the retransmission timer. */ 102 setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
103 init_timer(&peer->T3_rtx_timer); 103 (unsigned long)peer);
104 peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event; 104 setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
105 peer->T3_rtx_timer.data = (unsigned long)peer; 105 (unsigned long)peer);
106
107 /* Set up the heartbeat timer. */
108 init_timer(&peer->hb_timer);
109 peer->hb_timer.function = sctp_generate_heartbeat_event;
110 peer->hb_timer.data = (unsigned long)peer;
111 106
112 /* Initialize the 64-bit random nonce sent with heartbeat. */ 107 /* Initialize the 64-bit random nonce sent with heartbeat. */
113 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); 108 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
@@ -385,6 +380,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
385 tp->rto = tp->asoc->rto_max; 380 tp->rto = tp->asoc->rto_max;
386 381
387 tp->rtt = rtt; 382 tp->rtt = rtt;
383 tp->last_rto = tp->rto;
388 384
389 /* Reset rto_pending so that a new RTT measurement is started when a 385 /* Reset rto_pending so that a new RTT measurement is started when a
390 * new data chunk is sent. 386 * new data chunk is sent.
@@ -578,7 +574,7 @@ void sctp_transport_reset(struct sctp_transport *t)
578 */ 574 */
579 t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); 575 t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
580 t->ssthresh = asoc->peer.i.a_rwnd; 576 t->ssthresh = asoc->peer.i.a_rwnd;
581 t->rto = asoc->rto_initial; 577 t->last_rto = t->rto = asoc->rto_initial;
582 t->rtt = 0; 578 t->rtt = 0;
583 t->srtt = 0; 579 t->srtt = 0;
584 t->rttvar = 0; 580 t->rttvar = 0;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 2c17c7efad46..047c27df98f4 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -700,7 +700,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
700 if (rx_count >= asoc->base.sk->sk_rcvbuf) { 700 if (rx_count >= asoc->base.sk->sk_rcvbuf) {
701 701
702 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || 702 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
703 (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb))) 703 (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize)))
704 goto fail; 704 goto fail;
705 } 705 }
706 706
@@ -830,7 +830,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_authkey(
830 ak = (struct sctp_authkey_event *) 830 ak = (struct sctp_authkey_event *)
831 skb_put(skb, sizeof(struct sctp_authkey_event)); 831 skb_put(skb, sizeof(struct sctp_authkey_event));
832 832
833 ak->auth_type = SCTP_AUTHENTICATION_EVENT; 833 ak->auth_type = SCTP_AUTHENTICATION_INDICATION;
834 ak->auth_flags = 0; 834 ak->auth_flags = 0;
835 ak->auth_length = sizeof(struct sctp_authkey_event); 835 ak->auth_length = sizeof(struct sctp_authkey_event);
836 836
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 4be92d0a2cab..c25caefa3bcb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -53,6 +53,7 @@ static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq,
53 struct sctp_ulpevent *); 53 struct sctp_ulpevent *);
54static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, 54static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *,
55 struct sctp_ulpevent *); 55 struct sctp_ulpevent *);
56static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq);
56 57
57/* 1st Level Abstractions */ 58/* 1st Level Abstractions */
58 59
@@ -190,6 +191,7 @@ static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq)
190static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) 191static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
191{ 192{
192 ulpq->pd_mode = 0; 193 ulpq->pd_mode = 0;
194 sctp_ulpq_reasm_drain(ulpq);
193 return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc); 195 return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
194} 196}
195 197
@@ -699,6 +701,37 @@ void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 fwd_tsn)
699 } 701 }
700} 702}
701 703
704/*
705 * Drain the reassembly queue. If we just cleared parted delivery, it
706 * is possible that the reassembly queue will contain already reassembled
707 * messages. Retrieve any such messages and give them to the user.
708 */
709static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq)
710{
711 struct sctp_ulpevent *event = NULL;
712 struct sk_buff_head temp;
713
714 if (skb_queue_empty(&ulpq->reasm))
715 return;
716
717 while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) {
718 /* Do ordering if needed. */
719 if ((event) && (event->msg_flags & MSG_EOR)){
720 skb_queue_head_init(&temp);
721 __skb_queue_tail(&temp, sctp_event2skb(event));
722
723 event = sctp_ulpq_order(ulpq, event);
724 }
725
726 /* Send event to the ULP. 'event' is the
727 * sctp_ulpevent for very first SKB on the temp' list.
728 */
729 if (event)
730 sctp_ulpq_tail_event(ulpq, event);
731 }
732}
733
734
702/* Helper function to gather skbs that have possibly become 735/* Helper function to gather skbs that have possibly become
703 * ordered by an an incoming chunk. 736 * ordered by an an incoming chunk.
704 */ 737 */
@@ -862,7 +895,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
862 continue; 895 continue;
863 896
864 /* see if this ssn has been marked by skipping */ 897 /* see if this ssn has been marked by skipping */
865 if (!SSN_lt(cssn, sctp_ssn_peek(in, csid))) 898 if (!SSN_lte(cssn, sctp_ssn_peek(in, csid)))
866 break; 899 break;
867 900
868 __skb_unlink(pos, &ulpq->lobby); 901 __skb_unlink(pos, &ulpq->lobby);
@@ -1013,7 +1046,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1013 sctp_ulpq_partial_delivery(ulpq, chunk, gfp); 1046 sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
1014 } 1047 }
1015 1048
1016 sk_stream_mem_reclaim(asoc->base.sk); 1049 sk_mem_reclaim(asoc->base.sk);
1017 return; 1050 return;
1018} 1051}
1019 1052
diff --git a/net/socket.c b/net/socket.c
index 540013ea8620..7651de008502 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -112,6 +112,9 @@ static long compat_sock_ioctl(struct file *file,
112static int sock_fasync(int fd, struct file *filp, int on); 112static int sock_fasync(int fd, struct file *filp, int on);
113static ssize_t sock_sendpage(struct file *file, struct page *page, 113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more); 114 int offset, size_t size, loff_t *ppos, int more);
115static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
116 struct pipe_inode_info *pipe, size_t len,
117 unsigned int flags);
115 118
116/* 119/*
117 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 120 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -134,6 +137,7 @@ static const struct file_operations socket_file_ops = {
134 .fasync = sock_fasync, 137 .fasync = sock_fasync,
135 .sendpage = sock_sendpage, 138 .sendpage = sock_sendpage,
136 .splice_write = generic_splice_sendpage, 139 .splice_write = generic_splice_sendpage,
140 .splice_read = sock_splice_read,
137}; 141};
138 142
139/* 143/*
@@ -691,6 +695,15 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
691 return sock->ops->sendpage(sock, page, offset, size, flags); 695 return sock->ops->sendpage(sock, page, offset, size, flags);
692} 696}
693 697
698static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
699 struct pipe_inode_info *pipe, size_t len,
700 unsigned int flags)
701{
702 struct socket *sock = file->private_data;
703
704 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
705}
706
694static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 707static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
695 struct sock_iocb *siocb) 708 struct sock_iocb *siocb)
696{ 709{
@@ -1057,20 +1070,19 @@ int sock_wake_async(struct socket *sock, int how, int band)
1057 if (!sock || !sock->fasync_list) 1070 if (!sock || !sock->fasync_list)
1058 return -1; 1071 return -1;
1059 switch (how) { 1072 switch (how) {
1060 case 1: 1073 case SOCK_WAKE_WAITD:
1061
1062 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1074 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1063 break; 1075 break;
1064 goto call_kill; 1076 goto call_kill;
1065 case 2: 1077 case SOCK_WAKE_SPACE:
1066 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1078 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1067 break; 1079 break;
1068 /* fall through */ 1080 /* fall through */
1069 case 0: 1081 case SOCK_WAKE_IO:
1070call_kill: 1082call_kill:
1071 __kill_fasync(sock->fasync_list, SIGIO, band); 1083 __kill_fasync(sock->fasync_list, SIGIO, band);
1072 break; 1084 break;
1073 case 3: 1085 case SOCK_WAKE_URG:
1074 __kill_fasync(sock->fasync_list, SIGURG, band); 1086 __kill_fasync(sock->fasync_list, SIGURG, band);
1075 } 1087 }
1076 return 0; 1088 return 0;
@@ -1250,11 +1262,14 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
1250 goto out_release_both; 1262 goto out_release_both;
1251 1263
1252 fd1 = sock_alloc_fd(&newfile1); 1264 fd1 = sock_alloc_fd(&newfile1);
1253 if (unlikely(fd1 < 0)) 1265 if (unlikely(fd1 < 0)) {
1266 err = fd1;
1254 goto out_release_both; 1267 goto out_release_both;
1268 }
1255 1269
1256 fd2 = sock_alloc_fd(&newfile2); 1270 fd2 = sock_alloc_fd(&newfile2);
1257 if (unlikely(fd2 < 0)) { 1271 if (unlikely(fd2 < 0)) {
1272 err = fd2;
1258 put_filp(newfile1); 1273 put_filp(newfile1);
1259 put_unused_fd(fd1); 1274 put_unused_fd(fd1);
1260 goto out_release_both; 1275 goto out_release_both;
@@ -1350,17 +1365,17 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1350 * ready for listening. 1365 * ready for listening.
1351 */ 1366 */
1352 1367
1353int sysctl_somaxconn __read_mostly = SOMAXCONN;
1354
1355asmlinkage long sys_listen(int fd, int backlog) 1368asmlinkage long sys_listen(int fd, int backlog)
1356{ 1369{
1357 struct socket *sock; 1370 struct socket *sock;
1358 int err, fput_needed; 1371 int err, fput_needed;
1372 int somaxconn;
1359 1373
1360 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1374 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1361 if (sock) { 1375 if (sock) {
1362 if ((unsigned)backlog > sysctl_somaxconn) 1376 somaxconn = sock->sk->sk_net->sysctl_somaxconn;
1363 backlog = sysctl_somaxconn; 1377 if ((unsigned)backlog > somaxconn)
1378 backlog = somaxconn;
1364 1379
1365 err = security_socket_listen(sock, backlog); 1380 err = security_socket_listen(sock, backlog);
1366 if (!err) 1381 if (!err)
@@ -1578,16 +1593,11 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1578 struct msghdr msg; 1593 struct msghdr msg;
1579 struct iovec iov; 1594 struct iovec iov;
1580 int fput_needed; 1595 int fput_needed;
1581 struct file *sock_file;
1582 1596
1583 sock_file = fget_light(fd, &fput_needed); 1597 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1584 err = -EBADF; 1598 if (!sock)
1585 if (!sock_file)
1586 goto out; 1599 goto out;
1587 1600
1588 sock = sock_from_file(sock_file, &err);
1589 if (!sock)
1590 goto out_put;
1591 iov.iov_base = buff; 1601 iov.iov_base = buff;
1592 iov.iov_len = len; 1602 iov.iov_len = len;
1593 msg.msg_name = NULL; 1603 msg.msg_name = NULL;
@@ -1609,7 +1619,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1609 err = sock_sendmsg(sock, &msg, len); 1619 err = sock_sendmsg(sock, &msg, len);
1610 1620
1611out_put: 1621out_put:
1612 fput_light(sock_file, fput_needed); 1622 fput_light(sock->file, fput_needed);
1613out: 1623out:
1614 return err; 1624 return err;
1615} 1625}
@@ -1638,17 +1648,11 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1638 struct msghdr msg; 1648 struct msghdr msg;
1639 char address[MAX_SOCK_ADDR]; 1649 char address[MAX_SOCK_ADDR];
1640 int err, err2; 1650 int err, err2;
1641 struct file *sock_file;
1642 int fput_needed; 1651 int fput_needed;
1643 1652
1644 sock_file = fget_light(fd, &fput_needed); 1653 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1645 err = -EBADF;
1646 if (!sock_file)
1647 goto out;
1648
1649 sock = sock_from_file(sock_file, &err);
1650 if (!sock) 1654 if (!sock)
1651 goto out_put; 1655 goto out;
1652 1656
1653 msg.msg_control = NULL; 1657 msg.msg_control = NULL;
1654 msg.msg_controllen = 0; 1658 msg.msg_controllen = 0;
@@ -1667,8 +1671,8 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1667 if (err2 < 0) 1671 if (err2 < 0)
1668 err = err2; 1672 err = err2;
1669 } 1673 }
1670out_put: 1674
1671 fput_light(sock_file, fput_needed); 1675 fput_light(sock->file, fput_needed);
1672out: 1676out:
1673 return err; 1677 return err;
1674} 1678}
@@ -2316,6 +2320,11 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2316 return err; 2320 return err;
2317} 2321}
2318 2322
2323int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2324{
2325 return sock->ops->shutdown(sock, how);
2326}
2327
2319/* ABI emulation layers need these two */ 2328/* ABI emulation layers need these two */
2320EXPORT_SYMBOL(move_addr_to_kernel); 2329EXPORT_SYMBOL(move_addr_to_kernel);
2321EXPORT_SYMBOL(move_addr_to_user); 2330EXPORT_SYMBOL(move_addr_to_user);
@@ -2342,3 +2351,4 @@ EXPORT_SYMBOL(kernel_getsockopt);
2342EXPORT_SYMBOL(kernel_setsockopt); 2351EXPORT_SYMBOL(kernel_setsockopt);
2343EXPORT_SYMBOL(kernel_sendpage); 2352EXPORT_SYMBOL(kernel_sendpage);
2344EXPORT_SYMBOL(kernel_sock_ioctl); 2353EXPORT_SYMBOL(kernel_sock_ioctl);
2354EXPORT_SYMBOL(kernel_sock_shutdown);
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 5c69a725e530..92e1dbe50947 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
11 auth.o auth_null.o auth_unix.o \ 11 auth.o auth_null.o auth_unix.o \
12 svc.o svcsock.o svcauth.o svcauth_unix.o \ 12 svc.o svcsock.o svcauth.o svcauth_unix.o \
13 rpcb_clnt.o timer.o xdr.o \ 13 rpcb_clnt.o timer.o xdr.o \
14 sunrpc_syms.o cache.o rpc_pipe.o 14 sunrpc_syms.o cache.o rpc_pipe.o \
15 svc_xprt.o
15sunrpc-$(CONFIG_PROC_FS) += stats.o 16sunrpc-$(CONFIG_PROC_FS) += stats.o
16sunrpc-$(CONFIG_SYSCTL) += sysctl.o 17sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 1ea27559b1de..eca941ce298b 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -51,6 +51,7 @@ rpcauth_register(const struct rpc_authops *ops)
51 spin_unlock(&rpc_authflavor_lock); 51 spin_unlock(&rpc_authflavor_lock);
52 return ret; 52 return ret;
53} 53}
54EXPORT_SYMBOL_GPL(rpcauth_register);
54 55
55int 56int
56rpcauth_unregister(const struct rpc_authops *ops) 57rpcauth_unregister(const struct rpc_authops *ops)
@@ -68,6 +69,7 @@ rpcauth_unregister(const struct rpc_authops *ops)
68 spin_unlock(&rpc_authflavor_lock); 69 spin_unlock(&rpc_authflavor_lock);
69 return ret; 70 return ret;
70} 71}
72EXPORT_SYMBOL_GPL(rpcauth_unregister);
71 73
72struct rpc_auth * 74struct rpc_auth *
73rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) 75rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
@@ -102,6 +104,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
102out: 104out:
103 return auth; 105 return auth;
104} 106}
107EXPORT_SYMBOL_GPL(rpcauth_create);
105 108
106void 109void
107rpcauth_release(struct rpc_auth *auth) 110rpcauth_release(struct rpc_auth *auth)
@@ -151,6 +154,7 @@ rpcauth_init_credcache(struct rpc_auth *auth)
151 auth->au_credcache = new; 154 auth->au_credcache = new;
152 return 0; 155 return 0;
153} 156}
157EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
154 158
155/* 159/*
156 * Destroy a list of credentials 160 * Destroy a list of credentials
@@ -213,6 +217,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth)
213 kfree(cache); 217 kfree(cache);
214 } 218 }
215} 219}
220EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
216 221
217/* 222/*
218 * Remove stale credentials. Avoid sleeping inside the loop. 223 * Remove stale credentials. Avoid sleeping inside the loop.
@@ -332,6 +337,7 @@ found:
332out: 337out:
333 return cred; 338 return cred;
334} 339}
340EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
335 341
336struct rpc_cred * 342struct rpc_cred *
337rpcauth_lookupcred(struct rpc_auth *auth, int flags) 343rpcauth_lookupcred(struct rpc_auth *auth, int flags)
@@ -350,6 +356,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
350 put_group_info(acred.group_info); 356 put_group_info(acred.group_info);
351 return ret; 357 return ret;
352} 358}
359EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
353 360
354void 361void
355rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, 362rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
@@ -366,7 +373,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
366#endif 373#endif
367 cred->cr_uid = acred->uid; 374 cred->cr_uid = acred->uid;
368} 375}
369EXPORT_SYMBOL(rpcauth_init_cred); 376EXPORT_SYMBOL_GPL(rpcauth_init_cred);
370 377
371struct rpc_cred * 378struct rpc_cred *
372rpcauth_bindcred(struct rpc_task *task) 379rpcauth_bindcred(struct rpc_task *task)
@@ -435,6 +442,7 @@ need_lock:
435out_destroy: 442out_destroy:
436 cred->cr_ops->crdestroy(cred); 443 cred->cr_ops->crdestroy(cred);
437} 444}
445EXPORT_SYMBOL_GPL(put_rpccred);
438 446
439void 447void
440rpcauth_unbindcred(struct rpc_task *task) 448rpcauth_unbindcred(struct rpc_task *task)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 53995af9ca4b..6dac38792288 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -472,16 +472,15 @@ gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
472 char __user *dst, size_t buflen) 472 char __user *dst, size_t buflen)
473{ 473{
474 char *data = (char *)msg->data + msg->copied; 474 char *data = (char *)msg->data + msg->copied;
475 ssize_t mlen = msg->len; 475 size_t mlen = min(msg->len, buflen);
476 ssize_t left; 476 unsigned long left;
477 477
478 if (mlen > buflen)
479 mlen = buflen;
480 left = copy_to_user(dst, data, mlen); 478 left = copy_to_user(dst, data, mlen);
481 if (left < 0) { 479 if (left == mlen) {
482 msg->errno = left; 480 msg->errno = -EFAULT;
483 return left; 481 return -EFAULT;
484 } 482 }
483
485 mlen -= left; 484 mlen -= left;
486 msg->copied += mlen; 485 msg->copied += mlen;
487 msg->errno = 0; 486 msg->errno = 0;
@@ -540,7 +539,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
540 p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); 539 p = gss_fill_context(p, end, ctx, gss_msg->auth->mech);
541 if (IS_ERR(p)) { 540 if (IS_ERR(p)) {
542 err = PTR_ERR(p); 541 err = PTR_ERR(p);
543 gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN; 542 gss_msg->msg.errno = (err == -EAGAIN) ? -EAGAIN : -EACCES;
544 goto err_release_msg; 543 goto err_release_msg;
545 } 544 }
546 gss_msg->ctx = gss_get_ctx(ctx); 545 gss_msg->ctx = gss_get_ctx(ctx);
@@ -625,7 +624,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
625 err = -EINVAL; 624 err = -EINVAL;
626 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); 625 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
627 if (!gss_auth->mech) { 626 if (!gss_auth->mech) {
628 printk(KERN_WARNING "%s: Pseudoflavor %d not found!", 627 printk(KERN_WARNING "%s: Pseudoflavor %d not found!\n",
629 __FUNCTION__, flavor); 628 __FUNCTION__, flavor);
630 goto err_free; 629 goto err_free;
631 } 630 }
@@ -967,7 +966,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
967 if (maj_stat == GSS_S_CONTEXT_EXPIRED) 966 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
968 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); 967 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
969 if (maj_stat) { 968 if (maj_stat) {
970 dprintk("RPC: %5u gss_validate: gss_verify_mic returned" 969 dprintk("RPC: %5u gss_validate: gss_verify_mic returned "
971 "error 0x%08x\n", task->tk_pid, maj_stat); 970 "error 0x%08x\n", task->tk_pid, maj_stat);
972 goto out_bad; 971 goto out_bad;
973 } 972 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 91cd8f0d1e10..0dd792338fa9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -211,8 +211,8 @@ encryptor(struct scatterlist *sg, void *data)
211 if (thislen == 0) 211 if (thislen == 0)
212 return 0; 212 return 0;
213 213
214 sg_mark_end(desc->infrags, desc->fragno); 214 sg_mark_end(&desc->infrags[desc->fragno - 1]);
215 sg_mark_end(desc->outfrags, desc->fragno); 215 sg_mark_end(&desc->outfrags[desc->fragno - 1]);
216 216
217 ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, 217 ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
218 desc->infrags, thislen); 218 desc->infrags, thislen);
@@ -293,7 +293,7 @@ decryptor(struct scatterlist *sg, void *data)
293 if (thislen == 0) 293 if (thislen == 0)
294 return 0; 294 return 0;
295 295
296 sg_mark_end(desc->frags, desc->fragno); 296 sg_mark_end(&desc->frags[desc->fragno - 1]);
297 297
298 ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, 298 ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
299 desc->frags, thislen); 299 desc->frags, thislen);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 9843eacef11d..60c3dba545d7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -147,13 +147,17 @@ gss_import_sec_context_kerberos(const void *p,
147 p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); 147 p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
148 if (IS_ERR(p)) 148 if (IS_ERR(p))
149 goto out_err_free_ctx; 149 goto out_err_free_ctx;
150 if (tmp != SGN_ALG_DES_MAC_MD5) 150 if (tmp != SGN_ALG_DES_MAC_MD5) {
151 p = ERR_PTR(-ENOSYS);
151 goto out_err_free_ctx; 152 goto out_err_free_ctx;
153 }
152 p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); 154 p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
153 if (IS_ERR(p)) 155 if (IS_ERR(p))
154 goto out_err_free_ctx; 156 goto out_err_free_ctx;
155 if (tmp != SEAL_ALG_DES) 157 if (tmp != SEAL_ALG_DES) {
158 p = ERR_PTR(-ENOSYS);
156 goto out_err_free_ctx; 159 goto out_err_free_ctx;
160 }
157 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); 161 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
158 if (IS_ERR(p)) 162 if (IS_ERR(p))
159 goto out_err_free_ctx; 163 goto out_err_free_ctx;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index a0d9faa59cb5..dedcbd6108f4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -63,7 +63,6 @@
63#include <linux/jiffies.h> 63#include <linux/jiffies.h>
64#include <linux/sunrpc/gss_krb5.h> 64#include <linux/sunrpc/gss_krb5.h>
65#include <linux/random.h> 65#include <linux/random.h>
66#include <asm/scatterlist.h>
67#include <linux/crypto.h> 66#include <linux/crypto.h>
68 67
69#ifdef RPC_DEBUG 68#ifdef RPC_DEBUG
@@ -84,6 +83,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
84 u32 seq_send; 83 u32 seq_send;
85 84
86 dprintk("RPC: gss_krb5_seal\n"); 85 dprintk("RPC: gss_krb5_seal\n");
86 BUG_ON(ctx == NULL);
87 87
88 now = get_seconds(); 88 now = get_seconds();
89 89
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 8bd074df27d3..3bdc527ee64a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -4,7 +4,6 @@
4#include <linux/sunrpc/gss_krb5.h> 4#include <linux/sunrpc/gss_krb5.h>
5#include <linux/random.h> 5#include <linux/random.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <asm/scatterlist.h>
8#include <linux/crypto.h> 7#include <linux/crypto.h>
9 8
10#ifdef RPC_DEBUG 9#ifdef RPC_DEBUG
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73940df6c460..481f984e9a22 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -224,38 +224,34 @@ static int rsi_parse(struct cache_detail *cd,
224 224
225 /* major/minor */ 225 /* major/minor */
226 len = qword_get(&mesg, buf, mlen); 226 len = qword_get(&mesg, buf, mlen);
227 if (len < 0) 227 if (len <= 0)
228 goto out; 228 goto out;
229 if (len == 0) { 229 rsii.major_status = simple_strtoul(buf, &ep, 10);
230 if (*ep)
231 goto out;
232 len = qword_get(&mesg, buf, mlen);
233 if (len <= 0)
234 goto out;
235 rsii.minor_status = simple_strtoul(buf, &ep, 10);
236 if (*ep)
230 goto out; 237 goto out;
231 } else {
232 rsii.major_status = simple_strtoul(buf, &ep, 10);
233 if (*ep)
234 goto out;
235 len = qword_get(&mesg, buf, mlen);
236 if (len <= 0)
237 goto out;
238 rsii.minor_status = simple_strtoul(buf, &ep, 10);
239 if (*ep)
240 goto out;
241 238
242 /* out_handle */ 239 /* out_handle */
243 len = qword_get(&mesg, buf, mlen); 240 len = qword_get(&mesg, buf, mlen);
244 if (len < 0) 241 if (len < 0)
245 goto out; 242 goto out;
246 status = -ENOMEM; 243 status = -ENOMEM;
247 if (dup_to_netobj(&rsii.out_handle, buf, len)) 244 if (dup_to_netobj(&rsii.out_handle, buf, len))
248 goto out; 245 goto out;
249 246
250 /* out_token */ 247 /* out_token */
251 len = qword_get(&mesg, buf, mlen); 248 len = qword_get(&mesg, buf, mlen);
252 status = -EINVAL; 249 status = -EINVAL;
253 if (len < 0) 250 if (len < 0)
254 goto out; 251 goto out;
255 status = -ENOMEM; 252 status = -ENOMEM;
256 if (dup_to_netobj(&rsii.out_token, buf, len)) 253 if (dup_to_netobj(&rsii.out_token, buf, len))
257 goto out; 254 goto out;
258 }
259 rsii.h.expiry_time = expiry; 255 rsii.h.expiry_time = expiry;
260 rsip = rsi_update(&rsii, rsip); 256 rsip = rsi_update(&rsii, rsip);
261 status = 0; 257 status = 0;
@@ -975,6 +971,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
975 struct kvec *resv = &rqstp->rq_res.head[0]; 971 struct kvec *resv = &rqstp->rq_res.head[0];
976 struct xdr_netobj tmpobj; 972 struct xdr_netobj tmpobj;
977 struct rsi *rsip, rsikey; 973 struct rsi *rsip, rsikey;
974 int ret;
978 975
979 /* Read the verifier; should be NULL: */ 976 /* Read the verifier; should be NULL: */
980 *authp = rpc_autherr_badverf; 977 *authp = rpc_autherr_badverf;
@@ -1014,23 +1011,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1014 /* No upcall result: */ 1011 /* No upcall result: */
1015 return SVC_DROP; 1012 return SVC_DROP;
1016 case 0: 1013 case 0:
1014 ret = SVC_DROP;
1017 /* Got an answer to the upcall; use it: */ 1015 /* Got an answer to the upcall; use it: */
1018 if (gss_write_init_verf(rqstp, rsip)) 1016 if (gss_write_init_verf(rqstp, rsip))
1019 return SVC_DROP; 1017 goto out;
1020 if (resv->iov_len + 4 > PAGE_SIZE) 1018 if (resv->iov_len + 4 > PAGE_SIZE)
1021 return SVC_DROP; 1019 goto out;
1022 svc_putnl(resv, RPC_SUCCESS); 1020 svc_putnl(resv, RPC_SUCCESS);
1023 if (svc_safe_putnetobj(resv, &rsip->out_handle)) 1021 if (svc_safe_putnetobj(resv, &rsip->out_handle))
1024 return SVC_DROP; 1022 goto out;
1025 if (resv->iov_len + 3 * 4 > PAGE_SIZE) 1023 if (resv->iov_len + 3 * 4 > PAGE_SIZE)
1026 return SVC_DROP; 1024 goto out;
1027 svc_putnl(resv, rsip->major_status); 1025 svc_putnl(resv, rsip->major_status);
1028 svc_putnl(resv, rsip->minor_status); 1026 svc_putnl(resv, rsip->minor_status);
1029 svc_putnl(resv, GSS_SEQ_WIN); 1027 svc_putnl(resv, GSS_SEQ_WIN);
1030 if (svc_safe_putnetobj(resv, &rsip->out_token)) 1028 if (svc_safe_putnetobj(resv, &rsip->out_token))
1031 return SVC_DROP; 1029 goto out;
1032 } 1030 }
1033 return SVC_COMPLETE; 1031 ret = SVC_COMPLETE;
1032out:
1033 cache_put(&rsip->h, &rsi_cache);
1034 return ret;
1034} 1035}
1035 1036
1036/* 1037/*
@@ -1125,6 +1126,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1125 case RPC_GSS_PROC_DESTROY: 1126 case RPC_GSS_PROC_DESTROY:
1126 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) 1127 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
1127 goto auth_err; 1128 goto auth_err;
1129 rsci->h.expiry_time = get_seconds();
1128 set_bit(CACHE_NEGATIVE, &rsci->h.flags); 1130 set_bit(CACHE_NEGATIVE, &rsci->h.flags);
1129 if (resv->iov_len + 4 > PAGE_SIZE) 1131 if (resv->iov_len + 4 > PAGE_SIZE)
1130 goto drop; 1132 goto drop;
@@ -1386,19 +1388,26 @@ int
1386gss_svc_init(void) 1388gss_svc_init(void)
1387{ 1389{
1388 int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); 1390 int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
1389 if (rv == 0) { 1391 if (rv)
1390 cache_register(&rsc_cache); 1392 return rv;
1391 cache_register(&rsi_cache); 1393 rv = cache_register(&rsc_cache);
1392 } 1394 if (rv)
1395 goto out1;
1396 rv = cache_register(&rsi_cache);
1397 if (rv)
1398 goto out2;
1399 return 0;
1400out2:
1401 cache_unregister(&rsc_cache);
1402out1:
1403 svc_auth_unregister(RPC_AUTH_GSS);
1393 return rv; 1404 return rv;
1394} 1405}
1395 1406
1396void 1407void
1397gss_svc_shutdown(void) 1408gss_svc_shutdown(void)
1398{ 1409{
1399 if (cache_unregister(&rsc_cache)) 1410 cache_unregister(&rsc_cache);
1400 printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); 1411 cache_unregister(&rsi_cache);
1401 if (cache_unregister(&rsi_cache))
1402 printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n");
1403 svc_auth_unregister(RPC_AUTH_GSS); 1412 svc_auth_unregister(RPC_AUTH_GSS);
1404} 1413}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8e05557414ce..636c8e04e0be 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -245,6 +245,7 @@ int cache_check(struct cache_detail *detail,
245 cache_put(h, detail); 245 cache_put(h, detail);
246 return rv; 246 return rv;
247} 247}
248EXPORT_SYMBOL(cache_check);
248 249
249/* 250/*
250 * caches need to be periodically cleaned. 251 * caches need to be periodically cleaned.
@@ -290,44 +291,78 @@ static const struct file_operations cache_flush_operations;
290static void do_cache_clean(struct work_struct *work); 291static void do_cache_clean(struct work_struct *work);
291static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); 292static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
292 293
293void cache_register(struct cache_detail *cd) 294static void remove_cache_proc_entries(struct cache_detail *cd)
294{ 295{
295 cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); 296 if (cd->proc_ent == NULL)
296 if (cd->proc_ent) { 297 return;
297 struct proc_dir_entry *p; 298 if (cd->flush_ent)
298 cd->proc_ent->owner = cd->owner; 299 remove_proc_entry("flush", cd->proc_ent);
299 cd->channel_ent = cd->content_ent = NULL; 300 if (cd->channel_ent)
301 remove_proc_entry("channel", cd->proc_ent);
302 if (cd->content_ent)
303 remove_proc_entry("content", cd->proc_ent);
304 cd->proc_ent = NULL;
305 remove_proc_entry(cd->name, proc_net_rpc);
306}
300 307
301 p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, 308#ifdef CONFIG_PROC_FS
302 cd->proc_ent); 309static int create_cache_proc_entries(struct cache_detail *cd)
303 cd->flush_ent = p; 310{
304 if (p) { 311 struct proc_dir_entry *p;
305 p->proc_fops = &cache_flush_operations;
306 p->owner = cd->owner;
307 p->data = cd;
308 }
309 312
310 if (cd->cache_request || cd->cache_parse) { 313 cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
311 p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, 314 if (cd->proc_ent == NULL)
312 cd->proc_ent); 315 goto out_nomem;
313 cd->channel_ent = p; 316 cd->proc_ent->owner = cd->owner;
314 if (p) { 317 cd->channel_ent = cd->content_ent = NULL;
315 p->proc_fops = &cache_file_operations; 318
316 p->owner = cd->owner; 319 p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent);
317 p->data = cd; 320 cd->flush_ent = p;
318 } 321 if (p == NULL)
319 } 322 goto out_nomem;
320 if (cd->cache_show) { 323 p->proc_fops = &cache_flush_operations;
321 p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, 324 p->owner = cd->owner;
322 cd->proc_ent); 325 p->data = cd;
323 cd->content_ent = p; 326
324 if (p) { 327 if (cd->cache_request || cd->cache_parse) {
325 p->proc_fops = &content_file_operations; 328 p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
326 p->owner = cd->owner; 329 cd->proc_ent);
327 p->data = cd; 330 cd->channel_ent = p;
328 } 331 if (p == NULL)
329 } 332 goto out_nomem;
333 p->proc_fops = &cache_file_operations;
334 p->owner = cd->owner;
335 p->data = cd;
330 } 336 }
337 if (cd->cache_show) {
338 p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
339 cd->proc_ent);
340 cd->content_ent = p;
341 if (p == NULL)
342 goto out_nomem;
343 p->proc_fops = &content_file_operations;
344 p->owner = cd->owner;
345 p->data = cd;
346 }
347 return 0;
348out_nomem:
349 remove_cache_proc_entries(cd);
350 return -ENOMEM;
351}
352#else /* CONFIG_PROC_FS */
353static int create_cache_proc_entries(struct cache_detail *cd)
354{
355 return 0;
356}
357#endif
358
359int cache_register(struct cache_detail *cd)
360{
361 int ret;
362
363 ret = create_cache_proc_entries(cd);
364 if (ret)
365 return ret;
331 rwlock_init(&cd->hash_lock); 366 rwlock_init(&cd->hash_lock);
332 INIT_LIST_HEAD(&cd->queue); 367 INIT_LIST_HEAD(&cd->queue);
333 spin_lock(&cache_list_lock); 368 spin_lock(&cache_list_lock);
@@ -341,9 +376,11 @@ void cache_register(struct cache_detail *cd)
341 376
342 /* start the cleaning process */ 377 /* start the cleaning process */
343 schedule_delayed_work(&cache_cleaner, 0); 378 schedule_delayed_work(&cache_cleaner, 0);
379 return 0;
344} 380}
381EXPORT_SYMBOL(cache_register);
345 382
346int cache_unregister(struct cache_detail *cd) 383void cache_unregister(struct cache_detail *cd)
347{ 384{
348 cache_purge(cd); 385 cache_purge(cd);
349 spin_lock(&cache_list_lock); 386 spin_lock(&cache_list_lock);
@@ -351,30 +388,23 @@ int cache_unregister(struct cache_detail *cd)
351 if (cd->entries || atomic_read(&cd->inuse)) { 388 if (cd->entries || atomic_read(&cd->inuse)) {
352 write_unlock(&cd->hash_lock); 389 write_unlock(&cd->hash_lock);
353 spin_unlock(&cache_list_lock); 390 spin_unlock(&cache_list_lock);
354 return -EBUSY; 391 goto out;
355 } 392 }
356 if (current_detail == cd) 393 if (current_detail == cd)
357 current_detail = NULL; 394 current_detail = NULL;
358 list_del_init(&cd->others); 395 list_del_init(&cd->others);
359 write_unlock(&cd->hash_lock); 396 write_unlock(&cd->hash_lock);
360 spin_unlock(&cache_list_lock); 397 spin_unlock(&cache_list_lock);
361 if (cd->proc_ent) { 398 remove_cache_proc_entries(cd);
362 if (cd->flush_ent)
363 remove_proc_entry("flush", cd->proc_ent);
364 if (cd->channel_ent)
365 remove_proc_entry("channel", cd->proc_ent);
366 if (cd->content_ent)
367 remove_proc_entry("content", cd->proc_ent);
368
369 cd->proc_ent = NULL;
370 remove_proc_entry(cd->name, proc_net_rpc);
371 }
372 if (list_empty(&cache_list)) { 399 if (list_empty(&cache_list)) {
373 /* module must be being unloaded so its safe to kill the worker */ 400 /* module must be being unloaded so its safe to kill the worker */
374 cancel_delayed_work_sync(&cache_cleaner); 401 cancel_delayed_work_sync(&cache_cleaner);
375 } 402 }
376 return 0; 403 return;
404out:
405 printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
377} 406}
407EXPORT_SYMBOL(cache_unregister);
378 408
379/* clean cache tries to find something to clean 409/* clean cache tries to find something to clean
380 * and cleans it. 410 * and cleans it.
@@ -489,6 +519,7 @@ void cache_flush(void)
489 while (cache_clean() != -1) 519 while (cache_clean() != -1)
490 cond_resched(); 520 cond_resched();
491} 521}
522EXPORT_SYMBOL(cache_flush);
492 523
493void cache_purge(struct cache_detail *detail) 524void cache_purge(struct cache_detail *detail)
494{ 525{
@@ -497,7 +528,7 @@ void cache_purge(struct cache_detail *detail)
497 cache_flush(); 528 cache_flush();
498 detail->flush_time = 1; 529 detail->flush_time = 1;
499} 530}
500 531EXPORT_SYMBOL(cache_purge);
501 532
502 533
503/* 534/*
@@ -634,13 +665,13 @@ void cache_clean_deferred(void *owner)
634/* 665/*
635 * communicate with user-space 666 * communicate with user-space
636 * 667 *
637 * We have a magic /proc file - /proc/sunrpc/cache 668 * We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
638 * On read, you get a full request, or block 669 * On read, you get a full request, or block.
639 * On write, an update request is processed 670 * On write, an update request is processed.
640 * Poll works if anything to read, and always allows write 671 * Poll works if anything to read, and always allows write.
641 * 672 *
642 * Implemented by linked list of requests. Each open file has 673 * Implemented by linked list of requests. Each open file has
643 * a ->private that also exists in this list. New request are added 674 * a ->private that also exists in this list. New requests are added
644 * to the end and may wakeup and preceding readers. 675 * to the end and may wakeup and preceding readers.
645 * New readers are added to the head. If, on read, an item is found with 676 * New readers are added to the head. If, on read, an item is found with
646 * CACHE_UPCALLING clear, we free it from the list. 677 * CACHE_UPCALLING clear, we free it from the list.
@@ -963,6 +994,7 @@ void qword_add(char **bpp, int *lp, char *str)
963 *bpp = bp; 994 *bpp = bp;
964 *lp = len; 995 *lp = len;
965} 996}
997EXPORT_SYMBOL(qword_add);
966 998
967void qword_addhex(char **bpp, int *lp, char *buf, int blen) 999void qword_addhex(char **bpp, int *lp, char *buf, int blen)
968{ 1000{
@@ -991,6 +1023,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen)
991 *bpp = bp; 1023 *bpp = bp;
992 *lp = len; 1024 *lp = len;
993} 1025}
1026EXPORT_SYMBOL(qword_addhex);
994 1027
995static void warn_no_listener(struct cache_detail *detail) 1028static void warn_no_listener(struct cache_detail *detail)
996{ 1029{
@@ -1113,6 +1146,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
1113 *dest = '\0'; 1146 *dest = '\0';
1114 return len; 1147 return len;
1115} 1148}
1149EXPORT_SYMBOL(qword_get);
1116 1150
1117 1151
1118/* 1152/*
@@ -1127,6 +1161,7 @@ struct handle {
1127}; 1161};
1128 1162
1129static void *c_start(struct seq_file *m, loff_t *pos) 1163static void *c_start(struct seq_file *m, loff_t *pos)
1164 __acquires(cd->hash_lock)
1130{ 1165{
1131 loff_t n = *pos; 1166 loff_t n = *pos;
1132 unsigned hash, entry; 1167 unsigned hash, entry;
@@ -1183,6 +1218,7 @@ static void *c_next(struct seq_file *m, void *p, loff_t *pos)
1183} 1218}
1184 1219
1185static void c_stop(struct seq_file *m, void *p) 1220static void c_stop(struct seq_file *m, void *p)
1221 __releases(cd->hash_lock)
1186{ 1222{
1187 struct cache_detail *cd = ((struct handle*)m->private)->cd; 1223 struct cache_detail *cd = ((struct handle*)m->private)->cd;
1188 read_unlock(&cd->hash_lock); 1224 read_unlock(&cd->hash_lock);
@@ -1242,18 +1278,18 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1242 struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; 1278 struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
1243 char tbuf[20]; 1279 char tbuf[20];
1244 unsigned long p = *ppos; 1280 unsigned long p = *ppos;
1245 int len; 1281 size_t len;
1246 1282
1247 sprintf(tbuf, "%lu\n", cd->flush_time); 1283 sprintf(tbuf, "%lu\n", cd->flush_time);
1248 len = strlen(tbuf); 1284 len = strlen(tbuf);
1249 if (p >= len) 1285 if (p >= len)
1250 return 0; 1286 return 0;
1251 len -= p; 1287 len -= p;
1252 if (len > count) len = count; 1288 if (len > count)
1289 len = count;
1253 if (copy_to_user(buf, (void*)(tbuf+p), len)) 1290 if (copy_to_user(buf, (void*)(tbuf+p), len))
1254 len = -EFAULT; 1291 return -EFAULT;
1255 else 1292 *ppos += len;
1256 *ppos += len;
1257 return len; 1293 return len;
1258} 1294}
1259 1295
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 76be83ee4b04..0998e6d09664 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -30,6 +30,7 @@
30#include <linux/smp_lock.h> 30#include <linux/smp_lock.h>
31#include <linux/utsname.h> 31#include <linux/utsname.h>
32#include <linux/workqueue.h> 32#include <linux/workqueue.h>
33#include <linux/in6.h>
33 34
34#include <linux/sunrpc/clnt.h> 35#include <linux/sunrpc/clnt.h>
35#include <linux/sunrpc/rpc_pipe_fs.h> 36#include <linux/sunrpc/rpc_pipe_fs.h>
@@ -121,8 +122,9 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
121 } 122 }
122} 123}
123 124
124static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) 125static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
125{ 126{
127 struct rpc_program *program = args->program;
126 struct rpc_version *version; 128 struct rpc_version *version;
127 struct rpc_clnt *clnt = NULL; 129 struct rpc_clnt *clnt = NULL;
128 struct rpc_auth *auth; 130 struct rpc_auth *auth;
@@ -131,13 +133,13 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
131 133
132 /* sanity check the name before trying to print it */ 134 /* sanity check the name before trying to print it */
133 err = -EINVAL; 135 err = -EINVAL;
134 len = strlen(servname); 136 len = strlen(args->servername);
135 if (len > RPC_MAXNETNAMELEN) 137 if (len > RPC_MAXNETNAMELEN)
136 goto out_no_rpciod; 138 goto out_no_rpciod;
137 len++; 139 len++;
138 140
139 dprintk("RPC: creating %s client for %s (xprt %p)\n", 141 dprintk("RPC: creating %s client for %s (xprt %p)\n",
140 program->name, servname, xprt); 142 program->name, args->servername, xprt);
141 143
142 err = rpciod_up(); 144 err = rpciod_up();
143 if (err) 145 if (err)
@@ -145,7 +147,11 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
145 err = -EINVAL; 147 err = -EINVAL;
146 if (!xprt) 148 if (!xprt)
147 goto out_no_xprt; 149 goto out_no_xprt;
148 if (vers >= program->nrvers || !(version = program->version[vers])) 150
151 if (args->version >= program->nrvers)
152 goto out_err;
153 version = program->version[args->version];
154 if (version == NULL)
149 goto out_err; 155 goto out_err;
150 156
151 err = -ENOMEM; 157 err = -ENOMEM;
@@ -157,12 +163,12 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
157 clnt->cl_server = clnt->cl_inline_name; 163 clnt->cl_server = clnt->cl_inline_name;
158 if (len > sizeof(clnt->cl_inline_name)) { 164 if (len > sizeof(clnt->cl_inline_name)) {
159 char *buf = kmalloc(len, GFP_KERNEL); 165 char *buf = kmalloc(len, GFP_KERNEL);
160 if (buf != 0) 166 if (buf != NULL)
161 clnt->cl_server = buf; 167 clnt->cl_server = buf;
162 else 168 else
163 len = sizeof(clnt->cl_inline_name); 169 len = sizeof(clnt->cl_inline_name);
164 } 170 }
165 strlcpy(clnt->cl_server, servname, len); 171 strlcpy(clnt->cl_server, args->servername, len);
166 172
167 clnt->cl_xprt = xprt; 173 clnt->cl_xprt = xprt;
168 clnt->cl_procinfo = version->procs; 174 clnt->cl_procinfo = version->procs;
@@ -182,8 +188,15 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
182 if (!xprt_bound(clnt->cl_xprt)) 188 if (!xprt_bound(clnt->cl_xprt))
183 clnt->cl_autobind = 1; 189 clnt->cl_autobind = 1;
184 190
191 clnt->cl_timeout = xprt->timeout;
192 if (args->timeout != NULL) {
193 memcpy(&clnt->cl_timeout_default, args->timeout,
194 sizeof(clnt->cl_timeout_default));
195 clnt->cl_timeout = &clnt->cl_timeout_default;
196 }
197
185 clnt->cl_rtt = &clnt->cl_rtt_default; 198 clnt->cl_rtt = &clnt->cl_rtt_default;
186 rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); 199 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
187 200
188 kref_init(&clnt->cl_kref); 201 kref_init(&clnt->cl_kref);
189 202
@@ -191,10 +204,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
191 if (err < 0) 204 if (err < 0)
192 goto out_no_path; 205 goto out_no_path;
193 206
194 auth = rpcauth_create(flavor, clnt); 207 auth = rpcauth_create(args->authflavor, clnt);
195 if (IS_ERR(auth)) { 208 if (IS_ERR(auth)) {
196 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", 209 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
197 flavor); 210 args->authflavor);
198 err = PTR_ERR(auth); 211 err = PTR_ERR(auth);
199 goto out_no_auth; 212 goto out_no_auth;
200 } 213 }
@@ -245,9 +258,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
245 .srcaddr = args->saddress, 258 .srcaddr = args->saddress,
246 .dstaddr = args->address, 259 .dstaddr = args->address,
247 .addrlen = args->addrsize, 260 .addrlen = args->addrsize,
248 .timeout = args->timeout
249 }; 261 };
250 char servername[20]; 262 char servername[48];
251 263
252 xprt = xprt_create_transport(&xprtargs); 264 xprt = xprt_create_transport(&xprtargs);
253 if (IS_ERR(xprt)) 265 if (IS_ERR(xprt))
@@ -258,13 +270,34 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
258 * up a string representation of the passed-in address. 270 * up a string representation of the passed-in address.
259 */ 271 */
260 if (args->servername == NULL) { 272 if (args->servername == NULL) {
261 struct sockaddr_in *addr = 273 servername[0] = '\0';
262 (struct sockaddr_in *) args->address; 274 switch (args->address->sa_family) {
263 snprintf(servername, sizeof(servername), NIPQUAD_FMT, 275 case AF_INET: {
264 NIPQUAD(addr->sin_addr.s_addr)); 276 struct sockaddr_in *sin =
277 (struct sockaddr_in *)args->address;
278 snprintf(servername, sizeof(servername), NIPQUAD_FMT,
279 NIPQUAD(sin->sin_addr.s_addr));
280 break;
281 }
282 case AF_INET6: {
283 struct sockaddr_in6 *sin =
284 (struct sockaddr_in6 *)args->address;
285 snprintf(servername, sizeof(servername), NIP6_FMT,
286 NIP6(sin->sin6_addr));
287 break;
288 }
289 default:
290 /* caller wants default server name, but
291 * address family isn't recognized. */
292 return ERR_PTR(-EINVAL);
293 }
265 args->servername = servername; 294 args->servername = servername;
266 } 295 }
267 296
297 xprt = xprt_create_transport(&xprtargs);
298 if (IS_ERR(xprt))
299 return (struct rpc_clnt *)xprt;
300
268 /* 301 /*
269 * By default, kernel RPC client connects from a reserved port. 302 * By default, kernel RPC client connects from a reserved port.
270 * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, 303 * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
@@ -275,13 +308,12 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
275 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) 308 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
276 xprt->resvport = 0; 309 xprt->resvport = 0;
277 310
278 clnt = rpc_new_client(xprt, args->servername, args->program, 311 clnt = rpc_new_client(args, xprt);
279 args->version, args->authflavor);
280 if (IS_ERR(clnt)) 312 if (IS_ERR(clnt))
281 return clnt; 313 return clnt;
282 314
283 if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { 315 if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
284 int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); 316 int err = rpc_ping(clnt, RPC_TASK_SOFT);
285 if (err != 0) { 317 if (err != 0) {
286 rpc_shutdown_client(clnt); 318 rpc_shutdown_client(clnt);
287 return ERR_PTR(err); 319 return ERR_PTR(err);
@@ -292,8 +324,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
292 if (args->flags & RPC_CLNT_CREATE_HARDRTRY) 324 if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
293 clnt->cl_softrtry = 0; 325 clnt->cl_softrtry = 0;
294 326
295 if (args->flags & RPC_CLNT_CREATE_INTR)
296 clnt->cl_intr = 1;
297 if (args->flags & RPC_CLNT_CREATE_AUTOBIND) 327 if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
298 clnt->cl_autobind = 1; 328 clnt->cl_autobind = 1;
299 if (args->flags & RPC_CLNT_CREATE_DISCRTRY) 329 if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
@@ -322,7 +352,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
322 new->cl_autobind = 0; 352 new->cl_autobind = 0;
323 INIT_LIST_HEAD(&new->cl_tasks); 353 INIT_LIST_HEAD(&new->cl_tasks);
324 spin_lock_init(&new->cl_lock); 354 spin_lock_init(&new->cl_lock);
325 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); 355 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
326 new->cl_metrics = rpc_alloc_iostats(clnt); 356 new->cl_metrics = rpc_alloc_iostats(clnt);
327 if (new->cl_metrics == NULL) 357 if (new->cl_metrics == NULL)
328 goto out_no_stats; 358 goto out_no_stats;
@@ -345,6 +375,7 @@ out_no_clnt:
345 dprintk("RPC: %s: returned error %d\n", __FUNCTION__, err); 375 dprintk("RPC: %s: returned error %d\n", __FUNCTION__, err);
346 return ERR_PTR(err); 376 return ERR_PTR(err);
347} 377}
378EXPORT_SYMBOL_GPL(rpc_clone_client);
348 379
349/* 380/*
350 * Properly shut down an RPC client, terminating all outstanding 381 * Properly shut down an RPC client, terminating all outstanding
@@ -363,6 +394,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
363 394
364 rpc_release_client(clnt); 395 rpc_release_client(clnt);
365} 396}
397EXPORT_SYMBOL_GPL(rpc_shutdown_client);
366 398
367/* 399/*
368 * Free an RPC client 400 * Free an RPC client
@@ -459,7 +491,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
459 clnt->cl_prog = program->number; 491 clnt->cl_prog = program->number;
460 clnt->cl_vers = version->number; 492 clnt->cl_vers = version->number;
461 clnt->cl_stats = program->stats; 493 clnt->cl_stats = program->stats;
462 err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); 494 err = rpc_ping(clnt, RPC_TASK_SOFT);
463 if (err != 0) { 495 if (err != 0) {
464 rpc_shutdown_client(clnt); 496 rpc_shutdown_client(clnt);
465 clnt = ERR_PTR(err); 497 clnt = ERR_PTR(err);
@@ -467,6 +499,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
467out: 499out:
468 return clnt; 500 return clnt;
469} 501}
502EXPORT_SYMBOL_GPL(rpc_bind_new_program);
470 503
471/* 504/*
472 * Default callback for async RPC calls 505 * Default callback for async RPC calls
@@ -480,77 +513,34 @@ static const struct rpc_call_ops rpc_default_ops = {
480 .rpc_call_done = rpc_default_callback, 513 .rpc_call_done = rpc_default_callback,
481}; 514};
482 515
483/* 516/**
484 * Export the signal mask handling for synchronous code that 517 * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
485 * sleeps on RPC calls 518 * @task_setup_data: pointer to task initialisation data
486 */ 519 */
487#define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM)) 520struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
488
489static void rpc_save_sigmask(sigset_t *oldset, int intr)
490{
491 unsigned long sigallow = sigmask(SIGKILL);
492 sigset_t sigmask;
493
494 /* Block all signals except those listed in sigallow */
495 if (intr)
496 sigallow |= RPC_INTR_SIGNALS;
497 siginitsetinv(&sigmask, sigallow);
498 sigprocmask(SIG_BLOCK, &sigmask, oldset);
499}
500
501static inline void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
502{
503 rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
504}
505
506static inline void rpc_restore_sigmask(sigset_t *oldset)
507{
508 sigprocmask(SIG_SETMASK, oldset, NULL);
509}
510
511void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
512{
513 rpc_save_sigmask(oldset, clnt->cl_intr);
514}
515
516void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
517{
518 rpc_restore_sigmask(oldset);
519}
520
521static
522struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
523 struct rpc_message *msg,
524 int flags,
525 const struct rpc_call_ops *ops,
526 void *data)
527{ 521{
528 struct rpc_task *task, *ret; 522 struct rpc_task *task, *ret;
529 sigset_t oldset;
530 523
531 task = rpc_new_task(clnt, flags, ops, data); 524 task = rpc_new_task(task_setup_data);
532 if (task == NULL) { 525 if (task == NULL) {
533 rpc_release_calldata(ops, data); 526 rpc_release_calldata(task_setup_data->callback_ops,
534 return ERR_PTR(-ENOMEM); 527 task_setup_data->callback_data);
528 ret = ERR_PTR(-ENOMEM);
529 goto out;
535 } 530 }
536 531
537 /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ 532 if (task->tk_status != 0) {
538 rpc_task_sigmask(task, &oldset); 533 ret = ERR_PTR(task->tk_status);
539 if (msg != NULL) { 534 rpc_put_task(task);
540 rpc_call_setup(task, msg, 0); 535 goto out;
541 if (task->tk_status != 0) {
542 ret = ERR_PTR(task->tk_status);
543 rpc_put_task(task);
544 goto out;
545 }
546 } 536 }
547 atomic_inc(&task->tk_count); 537 atomic_inc(&task->tk_count);
548 rpc_execute(task); 538 rpc_execute(task);
549 ret = task; 539 ret = task;
550out: 540out:
551 rpc_restore_sigmask(&oldset);
552 return ret; 541 return ret;
553} 542}
543EXPORT_SYMBOL_GPL(rpc_run_task);
554 544
555/** 545/**
556 * rpc_call_sync - Perform a synchronous RPC call 546 * rpc_call_sync - Perform a synchronous RPC call
@@ -561,17 +551,24 @@ out:
561int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) 551int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
562{ 552{
563 struct rpc_task *task; 553 struct rpc_task *task;
554 struct rpc_task_setup task_setup_data = {
555 .rpc_client = clnt,
556 .rpc_message = msg,
557 .callback_ops = &rpc_default_ops,
558 .flags = flags,
559 };
564 int status; 560 int status;
565 561
566 BUG_ON(flags & RPC_TASK_ASYNC); 562 BUG_ON(flags & RPC_TASK_ASYNC);
567 563
568 task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL); 564 task = rpc_run_task(&task_setup_data);
569 if (IS_ERR(task)) 565 if (IS_ERR(task))
570 return PTR_ERR(task); 566 return PTR_ERR(task);
571 status = task->tk_status; 567 status = task->tk_status;
572 rpc_put_task(task); 568 rpc_put_task(task);
573 return status; 569 return status;
574} 570}
571EXPORT_SYMBOL_GPL(rpc_call_sync);
575 572
576/** 573/**
577 * rpc_call_async - Perform an asynchronous RPC call 574 * rpc_call_async - Perform an asynchronous RPC call
@@ -586,45 +583,28 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
586 const struct rpc_call_ops *tk_ops, void *data) 583 const struct rpc_call_ops *tk_ops, void *data)
587{ 584{
588 struct rpc_task *task; 585 struct rpc_task *task;
586 struct rpc_task_setup task_setup_data = {
587 .rpc_client = clnt,
588 .rpc_message = msg,
589 .callback_ops = tk_ops,
590 .callback_data = data,
591 .flags = flags|RPC_TASK_ASYNC,
592 };
589 593
590 task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data); 594 task = rpc_run_task(&task_setup_data);
591 if (IS_ERR(task)) 595 if (IS_ERR(task))
592 return PTR_ERR(task); 596 return PTR_ERR(task);
593 rpc_put_task(task); 597 rpc_put_task(task);
594 return 0; 598 return 0;
595} 599}
596 600EXPORT_SYMBOL_GPL(rpc_call_async);
597/**
598 * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
599 * @clnt: pointer to RPC client
600 * @flags: RPC flags
601 * @ops: RPC call ops
602 * @data: user call data
603 */
604struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
605 const struct rpc_call_ops *tk_ops,
606 void *data)
607{
608 return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
609}
610EXPORT_SYMBOL(rpc_run_task);
611 601
612void 602void
613rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) 603rpc_call_start(struct rpc_task *task)
614{ 604{
615 task->tk_msg = *msg; 605 task->tk_action = call_start;
616 task->tk_flags |= flags;
617 /* Bind the user cred */
618 if (task->tk_msg.rpc_cred != NULL)
619 rpcauth_holdcred(task);
620 else
621 rpcauth_bindcred(task);
622
623 if (task->tk_status == 0)
624 task->tk_action = call_start;
625 else
626 task->tk_action = rpc_exit_task;
627} 606}
607EXPORT_SYMBOL_GPL(rpc_call_start);
628 608
629/** 609/**
630 * rpc_peeraddr - extract remote peer address from clnt's xprt 610 * rpc_peeraddr - extract remote peer address from clnt's xprt
@@ -653,7 +633,8 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr);
653 * @format: address format 633 * @format: address format
654 * 634 *
655 */ 635 */
656char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) 636const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
637 enum rpc_display_format_t format)
657{ 638{
658 struct rpc_xprt *xprt = clnt->cl_xprt; 639 struct rpc_xprt *xprt = clnt->cl_xprt;
659 640
@@ -671,6 +652,7 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
671 if (xprt->ops->set_buffer_size) 652 if (xprt->ops->set_buffer_size)
672 xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); 653 xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
673} 654}
655EXPORT_SYMBOL_GPL(rpc_setbufsize);
674 656
675/* 657/*
676 * Return size of largest payload RPC client can support, in bytes 658 * Return size of largest payload RPC client can support, in bytes
@@ -710,6 +692,7 @@ rpc_restart_call(struct rpc_task *task)
710 692
711 task->tk_action = call_start; 693 task->tk_action = call_start;
712} 694}
695EXPORT_SYMBOL_GPL(rpc_restart_call);
713 696
714/* 697/*
715 * 0. Initial state 698 * 0. Initial state
@@ -1137,7 +1120,7 @@ call_status(struct rpc_task *task)
1137 case -ETIMEDOUT: 1120 case -ETIMEDOUT:
1138 task->tk_action = call_timeout; 1121 task->tk_action = call_timeout;
1139 if (task->tk_client->cl_discrtry) 1122 if (task->tk_client->cl_discrtry)
1140 xprt_disconnect(task->tk_xprt); 1123 xprt_force_disconnect(task->tk_xprt);
1141 break; 1124 break;
1142 case -ECONNREFUSED: 1125 case -ECONNREFUSED:
1143 case -ENOTCONN: 1126 case -ENOTCONN:
@@ -1260,7 +1243,7 @@ out_retry:
1260 req->rq_received = req->rq_private_buf.len = 0; 1243 req->rq_received = req->rq_private_buf.len = 0;
1261 task->tk_status = 0; 1244 task->tk_status = 0;
1262 if (task->tk_client->cl_discrtry) 1245 if (task->tk_client->cl_discrtry)
1263 xprt_disconnect(task->tk_xprt); 1246 xprt_force_disconnect(task->tk_xprt);
1264} 1247}
1265 1248
1266/* 1249/*
@@ -1517,9 +1500,15 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
1517 .rpc_proc = &rpcproc_null, 1500 .rpc_proc = &rpcproc_null,
1518 .rpc_cred = cred, 1501 .rpc_cred = cred,
1519 }; 1502 };
1520 return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL); 1503 struct rpc_task_setup task_setup_data = {
1504 .rpc_client = clnt,
1505 .rpc_message = &msg,
1506 .callback_ops = &rpc_default_ops,
1507 .flags = flags,
1508 };
1509 return rpc_run_task(&task_setup_data);
1521} 1510}
1522EXPORT_SYMBOL(rpc_call_null); 1511EXPORT_SYMBOL_GPL(rpc_call_null);
1523 1512
1524#ifdef RPC_DEBUG 1513#ifdef RPC_DEBUG
1525void rpc_show_tasks(void) 1514void rpc_show_tasks(void)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 18f0a8dcc095..7e197168a245 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -76,6 +76,16 @@ rpc_timeout_upcall_queue(struct work_struct *work)
76 rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); 76 rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT);
77} 77}
78 78
79/**
80 * rpc_queue_upcall
81 * @inode: inode of upcall pipe on which to queue given message
82 * @msg: message to queue
83 *
84 * Call with an @inode created by rpc_mkpipe() to queue an upcall.
85 * A userspace process may then later read the upcall by performing a
86 * read on an open file for this inode. It is up to the caller to
87 * initialize the fields of @msg (other than @msg->list) appropriately.
88 */
79int 89int
80rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) 90rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
81{ 91{
@@ -103,6 +113,7 @@ out:
103 wake_up(&rpci->waitq); 113 wake_up(&rpci->waitq);
104 return res; 114 return res;
105} 115}
116EXPORT_SYMBOL(rpc_queue_upcall);
106 117
107static inline void 118static inline void
108rpc_inode_setowner(struct inode *inode, void *private) 119rpc_inode_setowner(struct inode *inode, void *private)
@@ -280,7 +291,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
280 mask = POLLOUT | POLLWRNORM; 291 mask = POLLOUT | POLLWRNORM;
281 if (rpci->ops == NULL) 292 if (rpci->ops == NULL)
282 mask |= POLLERR | POLLHUP; 293 mask |= POLLERR | POLLHUP;
283 if (!list_empty(&rpci->pipe)) 294 if (filp->private_data || !list_empty(&rpci->pipe))
284 mask |= POLLIN | POLLRDNORM; 295 mask |= POLLIN | POLLRDNORM;
285 return mask; 296 return mask;
286} 297}
@@ -512,8 +523,8 @@ rpc_get_inode(struct super_block *sb, int mode)
512/* 523/*
513 * FIXME: This probably has races. 524 * FIXME: This probably has races.
514 */ 525 */
515static void 526static void rpc_depopulate(struct dentry *parent,
516rpc_depopulate(struct dentry *parent, int start, int eof) 527 unsigned long start, unsigned long eof)
517{ 528{
518 struct inode *dir = parent->d_inode; 529 struct inode *dir = parent->d_inode;
519 struct list_head *pos, *next; 530 struct list_head *pos, *next;
@@ -663,7 +674,16 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
663 return dentry; 674 return dentry;
664} 675}
665 676
666 677/**
678 * rpc_mkdir - Create a new directory in rpc_pipefs
679 * @path: path from the rpc_pipefs root to the new directory
680 * @rpc_clnt: rpc client to associate with this directory
681 *
682 * This creates a directory at the given @path associated with
683 * @rpc_clnt, which will contain a file named "info" with some basic
684 * information about the client, together with any "pipes" that may
685 * later be created using rpc_mkpipe().
686 */
667struct dentry * 687struct dentry *
668rpc_mkdir(char *path, struct rpc_clnt *rpc_client) 688rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
669{ 689{
@@ -699,6 +719,10 @@ err_dput:
699 goto out; 719 goto out;
700} 720}
701 721
722/**
723 * rpc_rmdir - Remove a directory created with rpc_mkdir()
724 * @dentry: directory to remove
725 */
702int 726int
703rpc_rmdir(struct dentry *dentry) 727rpc_rmdir(struct dentry *dentry)
704{ 728{
@@ -717,6 +741,25 @@ rpc_rmdir(struct dentry *dentry)
717 return error; 741 return error;
718} 742}
719 743
744/**
745 * rpc_mkpipe - make an rpc_pipefs file for kernel<->userspace communication
746 * @parent: dentry of directory to create new "pipe" in
747 * @name: name of pipe
748 * @private: private data to associate with the pipe, for the caller's use
749 * @ops: operations defining the behavior of the pipe: upcall, downcall,
750 * release_pipe, and destroy_msg.
751 *
752 * Data is made available for userspace to read by calls to
753 * rpc_queue_upcall(). The actual reads will result in calls to
754 * @ops->upcall, which will be called with the file pointer,
755 * message, and userspace buffer to copy to.
756 *
757 * Writes can come at any time, and do not necessarily have to be
758 * responses to upcalls. They will result in calls to @msg->downcall.
759 *
760 * The @private argument passed here will be available to all these methods
761 * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private.
762 */
720struct dentry * 763struct dentry *
721rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) 764rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
722{ 765{
@@ -763,7 +806,16 @@ err_dput:
763 -ENOMEM); 806 -ENOMEM);
764 goto out; 807 goto out;
765} 808}
809EXPORT_SYMBOL(rpc_mkpipe);
766 810
811/**
812 * rpc_unlink - remove a pipe
813 * @dentry: dentry for the pipe, as returned from rpc_mkpipe
814 *
815 * After this call, lookups will no longer find the pipe, and any
816 * attempts to read or write using preexisting opens of the pipe will
817 * return -EPIPE.
818 */
767int 819int
768rpc_unlink(struct dentry *dentry) 820rpc_unlink(struct dentry *dentry)
769{ 821{
@@ -785,6 +837,7 @@ rpc_unlink(struct dentry *dentry)
785 dput(parent); 837 dput(parent);
786 return error; 838 return error;
787} 839}
840EXPORT_SYMBOL(rpc_unlink);
788 841
789/* 842/*
790 * populate the filesystem 843 * populate the filesystem
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index a05493aedb68..3164a0871cf0 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -55,45 +55,6 @@ enum {
55#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT 55#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT
56 56
57/* 57/*
58 * r_addr
59 *
60 * Quoting RFC 3530, section 2.2:
61 *
62 * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
63 * US-ASCII string:
64 *
65 * h1.h2.h3.h4.p1.p2
66 *
67 * The prefix, "h1.h2.h3.h4", is the standard textual form for
68 * representing an IPv4 address, which is always four octets long.
69 * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
70 * the first through fourth octets each converted to ASCII-decimal.
71 * Assuming big-endian ordering, p1 and p2 are, respectively, the first
72 * and second octets each converted to ASCII-decimal. For example, if a
73 * host, in big-endian order, has an address of 0x0A010307 and there is
74 * a service listening on, in big endian order, port 0x020F (decimal
75 * 527), then the complete universal address is "10.1.3.7.2.15".
76 *
77 * ...
78 *
79 * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
80 * US-ASCII string:
81 *
82 * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
83 *
84 * The suffix "p1.p2" is the service port, and is computed the same way
85 * as with universal addresses for TCP and UDP over IPv4. The prefix,
86 * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
87 * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
88 * Additionally, the two alternative forms specified in Section 2.2 of
89 * [RFC2373] are also acceptable.
90 *
91 * XXX: Currently this implementation does not explicitly convert the
92 * stored address to US-ASCII on non-ASCII systems.
93 */
94#define RPCB_MAXADDRLEN (128u)
95
96/*
97 * r_owner 58 * r_owner
98 * 59 *
99 * The "owner" is allowed to unset a service in the rpcbind database. 60 * The "owner" is allowed to unset a service in the rpcbind database.
@@ -112,9 +73,9 @@ struct rpcbind_args {
112 u32 r_vers; 73 u32 r_vers;
113 u32 r_prot; 74 u32 r_prot;
114 unsigned short r_port; 75 unsigned short r_port;
115 char * r_netid; 76 const char * r_netid;
116 char r_addr[RPCB_MAXADDRLEN]; 77 const char * r_addr;
117 char * r_owner; 78 const char * r_owner;
118}; 79};
119 80
120static struct rpc_procinfo rpcb_procedures2[]; 81static struct rpc_procinfo rpcb_procedures2[];
@@ -128,19 +89,6 @@ struct rpcb_info {
128static struct rpcb_info rpcb_next_version[]; 89static struct rpcb_info rpcb_next_version[];
129static struct rpcb_info rpcb_next_version6[]; 90static struct rpcb_info rpcb_next_version6[];
130 91
131static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
132{
133 struct rpcbind_args *map = calldata;
134 struct rpc_xprt *xprt = map->r_xprt;
135 struct rpc_message msg = {
136 .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc,
137 .rpc_argp = map,
138 .rpc_resp = &map->r_port,
139 };
140
141 rpc_call_setup(task, &msg, 0);
142}
143
144static void rpcb_map_release(void *data) 92static void rpcb_map_release(void *data)
145{ 93{
146 struct rpcbind_args *map = data; 94 struct rpcbind_args *map = data;
@@ -150,7 +98,6 @@ static void rpcb_map_release(void *data)
150} 98}
151 99
152static const struct rpc_call_ops rpcb_getport_ops = { 100static const struct rpc_call_ops rpcb_getport_ops = {
153 .rpc_call_prepare = rpcb_getport_prepare,
154 .rpc_call_done = rpcb_getport_done, 101 .rpc_call_done = rpcb_getport_done,
155 .rpc_release = rpcb_map_release, 102 .rpc_release = rpcb_map_release,
156}; 103};
@@ -162,18 +109,18 @@ static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
162} 109}
163 110
164static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, 111static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
165 int proto, int version, int privileged) 112 size_t salen, int proto, u32 version,
113 int privileged)
166{ 114{
167 struct rpc_create_args args = { 115 struct rpc_create_args args = {
168 .protocol = proto, 116 .protocol = proto,
169 .address = srvaddr, 117 .address = srvaddr,
170 .addrsize = sizeof(struct sockaddr_in), 118 .addrsize = salen,
171 .servername = hostname, 119 .servername = hostname,
172 .program = &rpcb_program, 120 .program = &rpcb_program,
173 .version = version, 121 .version = version,
174 .authflavor = RPC_AUTH_UNIX, 122 .authflavor = RPC_AUTH_UNIX,
175 .flags = (RPC_CLNT_CREATE_NOPING | 123 .flags = RPC_CLNT_CREATE_NOPING,
176 RPC_CLNT_CREATE_INTR),
177 }; 124 };
178 125
179 switch (srvaddr->sa_family) { 126 switch (srvaddr->sa_family) {
@@ -230,7 +177,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
230 prog, vers, prot, port); 177 prog, vers, prot, port);
231 178
232 rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, 179 rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
233 XPRT_TRANSPORT_UDP, 2, 1); 180 sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1);
234 if (IS_ERR(rpcb_clnt)) 181 if (IS_ERR(rpcb_clnt))
235 return PTR_ERR(rpcb_clnt); 182 return PTR_ERR(rpcb_clnt);
236 183
@@ -252,13 +199,15 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
252 * @vers: RPC version number to bind 199 * @vers: RPC version number to bind
253 * @prot: transport protocol to use to make this request 200 * @prot: transport protocol to use to make this request
254 * 201 *
202 * Return value is the requested advertised port number,
203 * or a negative errno value.
204 *
255 * Called from outside the RPC client in a synchronous task context. 205 * Called from outside the RPC client in a synchronous task context.
256 * Uses default timeout parameters specified by underlying transport. 206 * Uses default timeout parameters specified by underlying transport.
257 * 207 *
258 * XXX: Needs to support IPv6, and rpcbind versions 3 and 4 208 * XXX: Needs to support IPv6
259 */ 209 */
260int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, 210int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
261 __u32 vers, int prot)
262{ 211{
263 struct rpcbind_args map = { 212 struct rpcbind_args map = {
264 .r_prog = prog, 213 .r_prog = prog,
@@ -272,14 +221,13 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
272 .rpc_resp = &map.r_port, 221 .rpc_resp = &map.r_port,
273 }; 222 };
274 struct rpc_clnt *rpcb_clnt; 223 struct rpc_clnt *rpcb_clnt;
275 char hostname[40];
276 int status; 224 int status;
277 225
278 dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", 226 dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
279 __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); 227 __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
280 228
281 sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); 229 rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
282 rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); 230 sizeof(*sin), prot, 2, 0);
283 if (IS_ERR(rpcb_clnt)) 231 if (IS_ERR(rpcb_clnt))
284 return PTR_ERR(rpcb_clnt); 232 return PTR_ERR(rpcb_clnt);
285 233
@@ -295,6 +243,24 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
295} 243}
296EXPORT_SYMBOL_GPL(rpcb_getport_sync); 244EXPORT_SYMBOL_GPL(rpcb_getport_sync);
297 245
246static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
247{
248 struct rpc_message msg = {
249 .rpc_proc = rpcb_next_version[version].rpc_proc,
250 .rpc_argp = map,
251 .rpc_resp = &map->r_port,
252 };
253 struct rpc_task_setup task_setup_data = {
254 .rpc_client = rpcb_clnt,
255 .rpc_message = &msg,
256 .callback_ops = &rpcb_getport_ops,
257 .callback_data = map,
258 .flags = RPC_TASK_ASYNC,
259 };
260
261 return rpc_run_task(&task_setup_data);
262}
263
298/** 264/**
299 * rpcb_getport_async - obtain the port for a given RPC service on a given host 265 * rpcb_getport_async - obtain the port for a given RPC service on a given host
300 * @task: task that is waiting for portmapper request 266 * @task: task that is waiting for portmapper request
@@ -305,12 +271,14 @@ EXPORT_SYMBOL_GPL(rpcb_getport_sync);
305void rpcb_getport_async(struct rpc_task *task) 271void rpcb_getport_async(struct rpc_task *task)
306{ 272{
307 struct rpc_clnt *clnt = task->tk_client; 273 struct rpc_clnt *clnt = task->tk_client;
308 int bind_version; 274 u32 bind_version;
309 struct rpc_xprt *xprt = task->tk_xprt; 275 struct rpc_xprt *xprt = task->tk_xprt;
310 struct rpc_clnt *rpcb_clnt; 276 struct rpc_clnt *rpcb_clnt;
311 static struct rpcbind_args *map; 277 static struct rpcbind_args *map;
312 struct rpc_task *child; 278 struct rpc_task *child;
313 struct sockaddr addr; 279 struct sockaddr_storage addr;
280 struct sockaddr *sap = (struct sockaddr *)&addr;
281 size_t salen;
314 int status; 282 int status;
315 struct rpcb_info *info; 283 struct rpcb_info *info;
316 284
@@ -340,10 +308,10 @@ void rpcb_getport_async(struct rpc_task *task)
340 goto bailout_nofree; 308 goto bailout_nofree;
341 } 309 }
342 310
343 rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); 311 salen = rpc_peeraddr(clnt, sap, sizeof(addr));
344 312
345 /* Don't ever use rpcbind v2 for AF_INET6 requests */ 313 /* Don't ever use rpcbind v2 for AF_INET6 requests */
346 switch (addr.sa_family) { 314 switch (sap->sa_family) {
347 case AF_INET: 315 case AF_INET:
348 info = rpcb_next_version; 316 info = rpcb_next_version;
349 break; 317 break;
@@ -368,7 +336,7 @@ void rpcb_getport_async(struct rpc_task *task)
368 dprintk("RPC: %5u %s: trying rpcbind version %u\n", 336 dprintk("RPC: %5u %s: trying rpcbind version %u\n",
369 task->tk_pid, __FUNCTION__, bind_version); 337 task->tk_pid, __FUNCTION__, bind_version);
370 338
371 rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, 339 rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
372 bind_version, 0); 340 bind_version, 0);
373 if (IS_ERR(rpcb_clnt)) { 341 if (IS_ERR(rpcb_clnt)) {
374 status = PTR_ERR(rpcb_clnt); 342 status = PTR_ERR(rpcb_clnt);
@@ -390,12 +358,10 @@ void rpcb_getport_async(struct rpc_task *task)
390 map->r_port = 0; 358 map->r_port = 0;
391 map->r_xprt = xprt_get(xprt); 359 map->r_xprt = xprt_get(xprt);
392 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); 360 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
393 memcpy(map->r_addr, 361 map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
394 rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
395 sizeof(map->r_addr));
396 map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ 362 map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
397 363
398 child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); 364 child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
399 rpc_release_client(rpcb_clnt); 365 rpc_release_client(rpcb_clnt);
400 if (IS_ERR(child)) { 366 if (IS_ERR(child)) {
401 status = -EIO; 367 status = -EIO;
@@ -518,7 +484,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
518 * Simple sanity check. The smallest possible universal 484 * Simple sanity check. The smallest possible universal
519 * address is an IPv4 address string containing 11 bytes. 485 * address is an IPv4 address string containing 11 bytes.
520 */ 486 */
521 if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN) 487 if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
522 goto out_err; 488 goto out_err;
523 489
524 /* 490 /*
@@ -569,7 +535,7 @@ out_err:
569#define RPCB_boolean_sz (1u) 535#define RPCB_boolean_sz (1u)
570 536
571#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) 537#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
572#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) 538#define RPCB_addr_sz (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN))
573#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) 539#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
574 540
575#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ 541#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c98873f39aec..4c669121e607 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -45,7 +45,7 @@ static void rpc_release_task(struct rpc_task *task);
45/* 45/*
46 * RPC tasks sit here while waiting for conditions to improve. 46 * RPC tasks sit here while waiting for conditions to improve.
47 */ 47 */
48static RPC_WAITQ(delay_queue, "delayq"); 48static struct rpc_wait_queue delay_queue;
49 49
50/* 50/*
51 * rpciod-related stuff 51 * rpciod-related stuff
@@ -135,7 +135,7 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
135 if (unlikely(task->tk_priority > queue->maxpriority)) 135 if (unlikely(task->tk_priority > queue->maxpriority))
136 q = &queue->tasks[queue->maxpriority]; 136 q = &queue->tasks[queue->maxpriority];
137 list_for_each_entry(t, q, u.tk_wait.list) { 137 list_for_each_entry(t, q, u.tk_wait.list) {
138 if (t->tk_cookie == task->tk_cookie) { 138 if (t->tk_owner == task->tk_owner) {
139 list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); 139 list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
140 return; 140 return;
141 } 141 }
@@ -208,26 +208,26 @@ static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int
208 queue->count = 1 << (priority * 2); 208 queue->count = 1 << (priority * 2);
209} 209}
210 210
211static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie) 211static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
212{ 212{
213 queue->cookie = cookie; 213 queue->owner = pid;
214 queue->nr = RPC_BATCH_COUNT; 214 queue->nr = RPC_BATCH_COUNT;
215} 215}
216 216
217static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) 217static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
218{ 218{
219 rpc_set_waitqueue_priority(queue, queue->maxpriority); 219 rpc_set_waitqueue_priority(queue, queue->maxpriority);
220 rpc_set_waitqueue_cookie(queue, 0); 220 rpc_set_waitqueue_owner(queue, 0);
221} 221}
222 222
223static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio) 223static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
224{ 224{
225 int i; 225 int i;
226 226
227 spin_lock_init(&queue->lock); 227 spin_lock_init(&queue->lock);
228 for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) 228 for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
229 INIT_LIST_HEAD(&queue->tasks[i]); 229 INIT_LIST_HEAD(&queue->tasks[i]);
230 queue->maxpriority = maxprio; 230 queue->maxpriority = nr_queues - 1;
231 rpc_reset_waitqueue_priority(queue); 231 rpc_reset_waitqueue_priority(queue);
232#ifdef RPC_DEBUG 232#ifdef RPC_DEBUG
233 queue->name = qname; 233 queue->name = qname;
@@ -236,18 +236,18 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
236 236
237void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) 237void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
238{ 238{
239 __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH); 239 __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
240} 240}
241 241
242void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) 242void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
243{ 243{
244 __rpc_init_priority_wait_queue(queue, qname, 0); 244 __rpc_init_priority_wait_queue(queue, qname, 1);
245} 245}
246EXPORT_SYMBOL(rpc_init_wait_queue); 246EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
247 247
248static int rpc_wait_bit_interruptible(void *word) 248static int rpc_wait_bit_killable(void *word)
249{ 249{
250 if (signal_pending(current)) 250 if (fatal_signal_pending(current))
251 return -ERESTARTSYS; 251 return -ERESTARTSYS;
252 schedule(); 252 schedule();
253 return 0; 253 return 0;
@@ -299,11 +299,11 @@ static void rpc_mark_complete_task(struct rpc_task *task)
299int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) 299int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
300{ 300{
301 if (action == NULL) 301 if (action == NULL)
302 action = rpc_wait_bit_interruptible; 302 action = rpc_wait_bit_killable;
303 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, 303 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
304 action, TASK_INTERRUPTIBLE); 304 action, TASK_KILLABLE);
305} 305}
306EXPORT_SYMBOL(__rpc_wait_for_completion_task); 306EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
307 307
308/* 308/*
309 * Make an RPC task runnable. 309 * Make an RPC task runnable.
@@ -373,6 +373,7 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
373 __rpc_sleep_on(q, task, action, timer); 373 __rpc_sleep_on(q, task, action, timer);
374 spin_unlock_bh(&q->lock); 374 spin_unlock_bh(&q->lock);
375} 375}
376EXPORT_SYMBOL_GPL(rpc_sleep_on);
376 377
377/** 378/**
378 * __rpc_do_wake_up_task - wake up a single rpc_task 379 * __rpc_do_wake_up_task - wake up a single rpc_task
@@ -444,6 +445,7 @@ void rpc_wake_up_task(struct rpc_task *task)
444 } 445 }
445 rcu_read_unlock_bh(); 446 rcu_read_unlock_bh();
446} 447}
448EXPORT_SYMBOL_GPL(rpc_wake_up_task);
447 449
448/* 450/*
449 * Wake up the next task on a priority queue. 451 * Wake up the next task on a priority queue.
@@ -454,12 +456,12 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
454 struct rpc_task *task; 456 struct rpc_task *task;
455 457
456 /* 458 /*
457 * Service a batch of tasks from a single cookie. 459 * Service a batch of tasks from a single owner.
458 */ 460 */
459 q = &queue->tasks[queue->priority]; 461 q = &queue->tasks[queue->priority];
460 if (!list_empty(q)) { 462 if (!list_empty(q)) {
461 task = list_entry(q->next, struct rpc_task, u.tk_wait.list); 463 task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
462 if (queue->cookie == task->tk_cookie) { 464 if (queue->owner == task->tk_owner) {
463 if (--queue->nr) 465 if (--queue->nr)
464 goto out; 466 goto out;
465 list_move_tail(&task->u.tk_wait.list, q); 467 list_move_tail(&task->u.tk_wait.list, q);
@@ -468,7 +470,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
468 * Check if we need to switch queues. 470 * Check if we need to switch queues.
469 */ 471 */
470 if (--queue->count) 472 if (--queue->count)
471 goto new_cookie; 473 goto new_owner;
472 } 474 }
473 475
474 /* 476 /*
@@ -490,8 +492,8 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
490 492
491new_queue: 493new_queue:
492 rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); 494 rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
493new_cookie: 495new_owner:
494 rpc_set_waitqueue_cookie(queue, task->tk_cookie); 496 rpc_set_waitqueue_owner(queue, task->tk_owner);
495out: 497out:
496 __rpc_wake_up_task(task); 498 __rpc_wake_up_task(task);
497 return task; 499 return task;
@@ -519,6 +521,7 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
519 521
520 return task; 522 return task;
521} 523}
524EXPORT_SYMBOL_GPL(rpc_wake_up_next);
522 525
523/** 526/**
524 * rpc_wake_up - wake up all rpc_tasks 527 * rpc_wake_up - wake up all rpc_tasks
@@ -544,6 +547,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
544 spin_unlock(&queue->lock); 547 spin_unlock(&queue->lock);
545 rcu_read_unlock_bh(); 548 rcu_read_unlock_bh();
546} 549}
550EXPORT_SYMBOL_GPL(rpc_wake_up);
547 551
548/** 552/**
549 * rpc_wake_up_status - wake up all rpc_tasks and set their status value. 553 * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
@@ -572,6 +576,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
572 spin_unlock(&queue->lock); 576 spin_unlock(&queue->lock);
573 rcu_read_unlock_bh(); 577 rcu_read_unlock_bh();
574} 578}
579EXPORT_SYMBOL_GPL(rpc_wake_up_status);
575 580
576static void __rpc_atrun(struct rpc_task *task) 581static void __rpc_atrun(struct rpc_task *task)
577{ 582{
@@ -586,6 +591,7 @@ void rpc_delay(struct rpc_task *task, unsigned long delay)
586 task->tk_timeout = delay; 591 task->tk_timeout = delay;
587 rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); 592 rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
588} 593}
594EXPORT_SYMBOL_GPL(rpc_delay);
589 595
590/* 596/*
591 * Helper to call task->tk_ops->rpc_call_prepare 597 * Helper to call task->tk_ops->rpc_call_prepare
@@ -614,7 +620,7 @@ void rpc_exit_task(struct rpc_task *task)
614 } 620 }
615 } 621 }
616} 622}
617EXPORT_SYMBOL(rpc_exit_task); 623EXPORT_SYMBOL_GPL(rpc_exit_task);
618 624
619void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) 625void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
620{ 626{
@@ -690,10 +696,9 @@ static void __rpc_execute(struct rpc_task *task)
690 696
691 /* sync task: sleep here */ 697 /* sync task: sleep here */
692 dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); 698 dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid);
693 /* Note: Caller should be using rpc_clnt_sigmask() */
694 status = out_of_line_wait_on_bit(&task->tk_runstate, 699 status = out_of_line_wait_on_bit(&task->tk_runstate,
695 RPC_TASK_QUEUED, rpc_wait_bit_interruptible, 700 RPC_TASK_QUEUED, rpc_wait_bit_killable,
696 TASK_INTERRUPTIBLE); 701 TASK_KILLABLE);
697 if (status == -ERESTARTSYS) { 702 if (status == -ERESTARTSYS) {
698 /* 703 /*
699 * When a sync task receives a signal, it exits with 704 * When a sync task receives a signal, it exits with
@@ -808,40 +813,47 @@ EXPORT_SYMBOL_GPL(rpc_free);
808/* 813/*
809 * Creation and deletion of RPC task structures 814 * Creation and deletion of RPC task structures
810 */ 815 */
811void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) 816static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
812{ 817{
813 memset(task, 0, sizeof(*task)); 818 memset(task, 0, sizeof(*task));
814 init_timer(&task->tk_timer); 819 setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
815 task->tk_timer.data = (unsigned long) task; 820 (unsigned long)task);
816 task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
817 atomic_set(&task->tk_count, 1); 821 atomic_set(&task->tk_count, 1);
818 task->tk_client = clnt; 822 task->tk_flags = task_setup_data->flags;
819 task->tk_flags = flags; 823 task->tk_ops = task_setup_data->callback_ops;
820 task->tk_ops = tk_ops; 824 task->tk_calldata = task_setup_data->callback_data;
821 if (tk_ops->rpc_call_prepare != NULL)
822 task->tk_action = rpc_prepare_task;
823 task->tk_calldata = calldata;
824 INIT_LIST_HEAD(&task->tk_task); 825 INIT_LIST_HEAD(&task->tk_task);
825 826
826 /* Initialize retry counters */ 827 /* Initialize retry counters */
827 task->tk_garb_retry = 2; 828 task->tk_garb_retry = 2;
828 task->tk_cred_retry = 2; 829 task->tk_cred_retry = 2;
829 830
830 task->tk_priority = RPC_PRIORITY_NORMAL; 831 task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
831 task->tk_cookie = (unsigned long)current; 832 task->tk_owner = current->tgid;
832 833
833 /* Initialize workqueue for async tasks */ 834 /* Initialize workqueue for async tasks */
834 task->tk_workqueue = rpciod_workqueue; 835 task->tk_workqueue = rpciod_workqueue;
835 836
836 if (clnt) { 837 task->tk_client = task_setup_data->rpc_client;
837 kref_get(&clnt->cl_kref); 838 if (task->tk_client != NULL) {
838 if (clnt->cl_softrtry) 839 kref_get(&task->tk_client->cl_kref);
840 if (task->tk_client->cl_softrtry)
839 task->tk_flags |= RPC_TASK_SOFT; 841 task->tk_flags |= RPC_TASK_SOFT;
840 if (!clnt->cl_intr)
841 task->tk_flags |= RPC_TASK_NOINTR;
842 } 842 }
843 843
844 BUG_ON(task->tk_ops == NULL); 844 if (task->tk_ops->rpc_call_prepare != NULL)
845 task->tk_action = rpc_prepare_task;
846
847 if (task_setup_data->rpc_message != NULL) {
848 memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg));
849 /* Bind the user cred */
850 if (task->tk_msg.rpc_cred != NULL)
851 rpcauth_holdcred(task);
852 else
853 rpcauth_bindcred(task);
854 if (task->tk_action == NULL)
855 rpc_call_start(task);
856 }
845 857
846 /* starting timestamp */ 858 /* starting timestamp */
847 task->tk_start = jiffies; 859 task->tk_start = jiffies;
@@ -866,18 +878,22 @@ static void rpc_free_task(struct rcu_head *rcu)
866/* 878/*
867 * Create a new task for the specified client. 879 * Create a new task for the specified client.
868 */ 880 */
869struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) 881struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
870{ 882{
871 struct rpc_task *task; 883 struct rpc_task *task = setup_data->task;
872 884 unsigned short flags = 0;
873 task = rpc_alloc_task(); 885
874 if (!task) 886 if (task == NULL) {
875 goto out; 887 task = rpc_alloc_task();
888 if (task == NULL)
889 goto out;
890 flags = RPC_TASK_DYNAMIC;
891 }
876 892
877 rpc_init_task(task, clnt, flags, tk_ops, calldata); 893 rpc_init_task(task, setup_data);
878 894
895 task->tk_flags |= flags;
879 dprintk("RPC: allocated task %p\n", task); 896 dprintk("RPC: allocated task %p\n", task);
880 task->tk_flags |= RPC_TASK_DYNAMIC;
881out: 897out:
882 return task; 898 return task;
883} 899}
@@ -903,7 +919,7 @@ void rpc_put_task(struct rpc_task *task)
903 call_rcu_bh(&task->u.tk_rcu, rpc_free_task); 919 call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
904 rpc_release_calldata(tk_ops, calldata); 920 rpc_release_calldata(tk_ops, calldata);
905} 921}
906EXPORT_SYMBOL(rpc_put_task); 922EXPORT_SYMBOL_GPL(rpc_put_task);
907 923
908static void rpc_release_task(struct rpc_task *task) 924static void rpc_release_task(struct rpc_task *task)
909{ 925{
@@ -960,6 +976,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
960 } 976 }
961 spin_unlock(&clnt->cl_lock); 977 spin_unlock(&clnt->cl_lock);
962} 978}
979EXPORT_SYMBOL_GPL(rpc_killall_tasks);
963 980
964int rpciod_up(void) 981int rpciod_up(void)
965{ 982{
@@ -1039,6 +1056,11 @@ rpc_init_mempool(void)
1039 goto err_nomem; 1056 goto err_nomem;
1040 if (!rpciod_start()) 1057 if (!rpciod_start())
1041 goto err_nomem; 1058 goto err_nomem;
1059 /*
1060 * The following is not strictly a mempool initialisation,
1061 * but there is no harm in doing it here
1062 */
1063 rpc_init_wait_queue(&delay_queue, "delayq");
1042 return 0; 1064 return 0;
1043err_nomem: 1065err_nomem:
1044 rpc_destroy_mempool(); 1066 rpc_destroy_mempool();
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 97ac45f034d6..a661a3acb37e 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -72,7 +72,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
72 struct page **ppage = xdr->pages; 72 struct page **ppage = xdr->pages;
73 unsigned int len, pglen = xdr->page_len; 73 unsigned int len, pglen = xdr->page_len;
74 ssize_t copied = 0; 74 ssize_t copied = 0;
75 int ret; 75 size_t ret;
76 76
77 len = xdr->head[0].iov_len; 77 len = xdr->head[0].iov_len;
78 if (base < len) { 78 if (base < len) {
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 4d4f3738b688..5a16875f5ac8 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -33,7 +33,7 @@ struct proc_dir_entry *proc_net_rpc = NULL;
33static int rpc_proc_show(struct seq_file *seq, void *v) { 33static int rpc_proc_show(struct seq_file *seq, void *v) {
34 const struct rpc_stat *statp = seq->private; 34 const struct rpc_stat *statp = seq->private;
35 const struct rpc_program *prog = statp->program; 35 const struct rpc_program *prog = statp->program;
36 int i, j; 36 unsigned int i, j;
37 37
38 seq_printf(seq, 38 seq_printf(seq,
39 "net %u %u %u %u\n", 39 "net %u %u %u %u\n",
@@ -81,7 +81,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
81 const struct svc_program *prog = statp->program; 81 const struct svc_program *prog = statp->program;
82 const struct svc_procedure *proc; 82 const struct svc_procedure *proc;
83 const struct svc_version *vers; 83 const struct svc_version *vers;
84 int i, j; 84 unsigned int i, j;
85 85
86 seq_printf(seq, 86 seq_printf(seq,
87 "net %u %u %u %u\n", 87 "net %u %u %u %u\n",
@@ -106,6 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
106 seq_putc(seq, '\n'); 106 seq_putc(seq, '\n');
107 } 107 }
108} 108}
109EXPORT_SYMBOL(svc_seq_show);
109 110
110/** 111/**
111 * rpc_alloc_iostats - allocate an rpc_iostats structure 112 * rpc_alloc_iostats - allocate an rpc_iostats structure
@@ -118,7 +119,7 @@ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
118 new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); 119 new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
119 return new; 120 return new;
120} 121}
121EXPORT_SYMBOL(rpc_alloc_iostats); 122EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
122 123
123/** 124/**
124 * rpc_free_iostats - release an rpc_iostats structure 125 * rpc_free_iostats - release an rpc_iostats structure
@@ -129,7 +130,7 @@ void rpc_free_iostats(struct rpc_iostats *stats)
129{ 130{
130 kfree(stats); 131 kfree(stats);
131} 132}
132EXPORT_SYMBOL(rpc_free_iostats); 133EXPORT_SYMBOL_GPL(rpc_free_iostats);
133 134
134/** 135/**
135 * rpc_count_iostats - tally up per-task stats 136 * rpc_count_iostats - tally up per-task stats
@@ -215,7 +216,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
215 metrics->om_execute * MILLISECS_PER_JIFFY); 216 metrics->om_execute * MILLISECS_PER_JIFFY);
216 } 217 }
217} 218}
218EXPORT_SYMBOL(rpc_print_iostats); 219EXPORT_SYMBOL_GPL(rpc_print_iostats);
219 220
220/* 221/*
221 * Register/unregister RPC proc files 222 * Register/unregister RPC proc files
@@ -241,24 +242,28 @@ rpc_proc_register(struct rpc_stat *statp)
241{ 242{
242 return do_register(statp->program->name, statp, &rpc_proc_fops); 243 return do_register(statp->program->name, statp, &rpc_proc_fops);
243} 244}
245EXPORT_SYMBOL_GPL(rpc_proc_register);
244 246
245void 247void
246rpc_proc_unregister(const char *name) 248rpc_proc_unregister(const char *name)
247{ 249{
248 remove_proc_entry(name, proc_net_rpc); 250 remove_proc_entry(name, proc_net_rpc);
249} 251}
252EXPORT_SYMBOL_GPL(rpc_proc_unregister);
250 253
251struct proc_dir_entry * 254struct proc_dir_entry *
252svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) 255svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
253{ 256{
254 return do_register(statp->program->pg_name, statp, fops); 257 return do_register(statp->program->pg_name, statp, fops);
255} 258}
259EXPORT_SYMBOL(svc_proc_register);
256 260
257void 261void
258svc_proc_unregister(const char *name) 262svc_proc_unregister(const char *name)
259{ 263{
260 remove_proc_entry(name, proc_net_rpc); 264 remove_proc_entry(name, proc_net_rpc);
261} 265}
266EXPORT_SYMBOL(svc_proc_unregister);
262 267
263void 268void
264rpc_proc_init(void) 269rpc_proc_init(void)
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 33d89e842c85..843629f55763 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,114 +22,6 @@
22#include <linux/sunrpc/rpc_pipe_fs.h> 22#include <linux/sunrpc/rpc_pipe_fs.h>
23#include <linux/sunrpc/xprtsock.h> 23#include <linux/sunrpc/xprtsock.h>
24 24
25/* RPC scheduler */
26EXPORT_SYMBOL(rpc_execute);
27EXPORT_SYMBOL(rpc_init_task);
28EXPORT_SYMBOL(rpc_sleep_on);
29EXPORT_SYMBOL(rpc_wake_up_next);
30EXPORT_SYMBOL(rpc_wake_up_task);
31EXPORT_SYMBOL(rpc_wake_up_status);
32
33/* RPC client functions */
34EXPORT_SYMBOL(rpc_clone_client);
35EXPORT_SYMBOL(rpc_bind_new_program);
36EXPORT_SYMBOL(rpc_shutdown_client);
37EXPORT_SYMBOL(rpc_killall_tasks);
38EXPORT_SYMBOL(rpc_call_sync);
39EXPORT_SYMBOL(rpc_call_async);
40EXPORT_SYMBOL(rpc_call_setup);
41EXPORT_SYMBOL(rpc_clnt_sigmask);
42EXPORT_SYMBOL(rpc_clnt_sigunmask);
43EXPORT_SYMBOL(rpc_delay);
44EXPORT_SYMBOL(rpc_restart_call);
45EXPORT_SYMBOL(rpc_setbufsize);
46EXPORT_SYMBOL(rpc_unlink);
47EXPORT_SYMBOL(rpc_wake_up);
48EXPORT_SYMBOL(rpc_queue_upcall);
49EXPORT_SYMBOL(rpc_mkpipe);
50
51/* Client transport */
52EXPORT_SYMBOL(xprt_set_timeout);
53
54/* Client credential cache */
55EXPORT_SYMBOL(rpcauth_register);
56EXPORT_SYMBOL(rpcauth_unregister);
57EXPORT_SYMBOL(rpcauth_create);
58EXPORT_SYMBOL(rpcauth_lookupcred);
59EXPORT_SYMBOL(rpcauth_lookup_credcache);
60EXPORT_SYMBOL(rpcauth_destroy_credcache);
61EXPORT_SYMBOL(rpcauth_init_credcache);
62EXPORT_SYMBOL(put_rpccred);
63
64/* RPC server stuff */
65EXPORT_SYMBOL(svc_create);
66EXPORT_SYMBOL(svc_create_thread);
67EXPORT_SYMBOL(svc_create_pooled);
68EXPORT_SYMBOL(svc_set_num_threads);
69EXPORT_SYMBOL(svc_exit_thread);
70EXPORT_SYMBOL(svc_destroy);
71EXPORT_SYMBOL(svc_drop);
72EXPORT_SYMBOL(svc_process);
73EXPORT_SYMBOL(svc_recv);
74EXPORT_SYMBOL(svc_wake_up);
75EXPORT_SYMBOL(svc_makesock);
76EXPORT_SYMBOL(svc_reserve);
77EXPORT_SYMBOL(svc_auth_register);
78EXPORT_SYMBOL(auth_domain_lookup);
79EXPORT_SYMBOL(svc_authenticate);
80EXPORT_SYMBOL(svc_set_client);
81
82/* RPC statistics */
83#ifdef CONFIG_PROC_FS
84EXPORT_SYMBOL(rpc_proc_register);
85EXPORT_SYMBOL(rpc_proc_unregister);
86EXPORT_SYMBOL(svc_proc_register);
87EXPORT_SYMBOL(svc_proc_unregister);
88EXPORT_SYMBOL(svc_seq_show);
89#endif
90
91/* caching... */
92EXPORT_SYMBOL(auth_domain_find);
93EXPORT_SYMBOL(auth_domain_put);
94EXPORT_SYMBOL(auth_unix_add_addr);
95EXPORT_SYMBOL(auth_unix_forget_old);
96EXPORT_SYMBOL(auth_unix_lookup);
97EXPORT_SYMBOL(cache_check);
98EXPORT_SYMBOL(cache_flush);
99EXPORT_SYMBOL(cache_purge);
100EXPORT_SYMBOL(cache_register);
101EXPORT_SYMBOL(cache_unregister);
102EXPORT_SYMBOL(qword_add);
103EXPORT_SYMBOL(qword_addhex);
104EXPORT_SYMBOL(qword_get);
105EXPORT_SYMBOL(svcauth_unix_purge);
106EXPORT_SYMBOL(unix_domain_find);
107
108/* Generic XDR */
109EXPORT_SYMBOL(xdr_encode_string);
110EXPORT_SYMBOL(xdr_decode_string_inplace);
111EXPORT_SYMBOL(xdr_decode_netobj);
112EXPORT_SYMBOL(xdr_encode_netobj);
113EXPORT_SYMBOL(xdr_encode_pages);
114EXPORT_SYMBOL(xdr_inline_pages);
115EXPORT_SYMBOL(xdr_shift_buf);
116EXPORT_SYMBOL(xdr_encode_word);
117EXPORT_SYMBOL(xdr_decode_word);
118EXPORT_SYMBOL(xdr_encode_array2);
119EXPORT_SYMBOL(xdr_decode_array2);
120EXPORT_SYMBOL(xdr_buf_from_iov);
121EXPORT_SYMBOL(xdr_buf_subsegment);
122EXPORT_SYMBOL(xdr_buf_read_netobj);
123EXPORT_SYMBOL(read_bytes_from_xdr_buf);
124
125/* Debugging symbols */
126#ifdef RPC_DEBUG
127EXPORT_SYMBOL(rpc_debug);
128EXPORT_SYMBOL(nfs_debug);
129EXPORT_SYMBOL(nfsd_debug);
130EXPORT_SYMBOL(nlm_debug);
131#endif
132
133extern struct cache_detail ip_map_cache, unix_gid_cache; 25extern struct cache_detail ip_map_cache, unix_gid_cache;
134 26
135static int __init 27static int __init
@@ -151,7 +43,8 @@ init_sunrpc(void)
151#endif 43#endif
152 cache_register(&ip_map_cache); 44 cache_register(&ip_map_cache);
153 cache_register(&unix_gid_cache); 45 cache_register(&unix_gid_cache);
154 init_socket_xprt(); 46 svc_init_xprt_sock(); /* svc sock transport */
47 init_socket_xprt(); /* clnt sock transport */
155 rpcauth_init_module(); 48 rpcauth_init_module();
156out: 49out:
157 return err; 50 return err;
@@ -162,12 +55,11 @@ cleanup_sunrpc(void)
162{ 55{
163 rpcauth_remove_module(); 56 rpcauth_remove_module();
164 cleanup_socket_xprt(); 57 cleanup_socket_xprt();
58 svc_cleanup_xprt_sock();
165 unregister_rpc_pipefs(); 59 unregister_rpc_pipefs();
166 rpc_destroy_mempool(); 60 rpc_destroy_mempool();
167 if (cache_unregister(&ip_map_cache)) 61 cache_unregister(&ip_map_cache);
168 printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); 62 cache_unregister(&unix_gid_cache);
169 if (cache_unregister(&unix_gid_cache))
170 printk(KERN_ERR "sunrpc: failed to unregister unix_gid cache\n");
171#ifdef RPC_DEBUG 63#ifdef RPC_DEBUG
172 rpc_unregister_sysctl(); 64 rpc_unregister_sysctl();
173#endif 65#endif
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a4a6bf7deaa4..a290e1523297 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/sched.h>
21 22
22#include <linux/sunrpc/types.h> 23#include <linux/sunrpc/types.h>
23#include <linux/sunrpc/xdr.h> 24#include <linux/sunrpc/xdr.h>
@@ -363,7 +364,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
363 void (*shutdown)(struct svc_serv *serv)) 364 void (*shutdown)(struct svc_serv *serv))
364{ 365{
365 struct svc_serv *serv; 366 struct svc_serv *serv;
366 int vers; 367 unsigned int vers;
367 unsigned int xdrsize; 368 unsigned int xdrsize;
368 unsigned int i; 369 unsigned int i;
369 370
@@ -432,6 +433,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize,
432{ 433{
433 return __svc_create(prog, bufsize, /*npools*/1, shutdown); 434 return __svc_create(prog, bufsize, /*npools*/1, shutdown);
434} 435}
436EXPORT_SYMBOL(svc_create);
435 437
436struct svc_serv * 438struct svc_serv *
437svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 439svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
@@ -451,6 +453,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
451 453
452 return serv; 454 return serv;
453} 455}
456EXPORT_SYMBOL(svc_create_pooled);
454 457
455/* 458/*
456 * Destroy an RPC service. Should be called with the BKL held 459 * Destroy an RPC service. Should be called with the BKL held
@@ -458,9 +461,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
458void 461void
459svc_destroy(struct svc_serv *serv) 462svc_destroy(struct svc_serv *serv)
460{ 463{
461 struct svc_sock *svsk;
462 struct svc_sock *tmp;
463
464 dprintk("svc: svc_destroy(%s, %d)\n", 464 dprintk("svc: svc_destroy(%s, %d)\n",
465 serv->sv_program->pg_name, 465 serv->sv_program->pg_name,
466 serv->sv_nrthreads); 466 serv->sv_nrthreads);
@@ -475,14 +475,12 @@ svc_destroy(struct svc_serv *serv)
475 475
476 del_timer_sync(&serv->sv_temptimer); 476 del_timer_sync(&serv->sv_temptimer);
477 477
478 list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list) 478 svc_close_all(&serv->sv_tempsocks);
479 svc_force_close_socket(svsk);
480 479
481 if (serv->sv_shutdown) 480 if (serv->sv_shutdown)
482 serv->sv_shutdown(serv); 481 serv->sv_shutdown(serv);
483 482
484 list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list) 483 svc_close_all(&serv->sv_permsocks);
485 svc_force_close_socket(svsk);
486 484
487 BUG_ON(!list_empty(&serv->sv_permsocks)); 485 BUG_ON(!list_empty(&serv->sv_permsocks));
488 BUG_ON(!list_empty(&serv->sv_tempsocks)); 486 BUG_ON(!list_empty(&serv->sv_tempsocks));
@@ -497,6 +495,7 @@ svc_destroy(struct svc_serv *serv)
497 kfree(serv->sv_pools); 495 kfree(serv->sv_pools);
498 kfree(serv); 496 kfree(serv);
499} 497}
498EXPORT_SYMBOL(svc_destroy);
500 499
501/* 500/*
502 * Allocate an RPC server's buffer space. 501 * Allocate an RPC server's buffer space.
@@ -535,31 +534,17 @@ svc_release_buffer(struct svc_rqst *rqstp)
535 put_page(rqstp->rq_pages[i]); 534 put_page(rqstp->rq_pages[i]);
536} 535}
537 536
538/* 537struct svc_rqst *
539 * Create a thread in the given pool. Caller must hold BKL. 538svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
540 * On a NUMA or SMP machine, with a multi-pool serv, the thread
541 * will be restricted to run on the cpus belonging to the pool.
542 */
543static int
544__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
545 struct svc_pool *pool)
546{ 539{
547 struct svc_rqst *rqstp; 540 struct svc_rqst *rqstp;
548 int error = -ENOMEM;
549 int have_oldmask = 0;
550 cpumask_t oldmask;
551 541
552 rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); 542 rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
553 if (!rqstp) 543 if (!rqstp)
554 goto out; 544 goto out_enomem;
555 545
556 init_waitqueue_head(&rqstp->rq_wait); 546 init_waitqueue_head(&rqstp->rq_wait);
557 547
558 if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
559 || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
560 || !svc_init_buffer(rqstp, serv->sv_max_mesg))
561 goto out_thread;
562
563 serv->sv_nrthreads++; 548 serv->sv_nrthreads++;
564 spin_lock_bh(&pool->sp_lock); 549 spin_lock_bh(&pool->sp_lock);
565 pool->sp_nrthreads++; 550 pool->sp_nrthreads++;
@@ -568,6 +553,45 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
568 rqstp->rq_server = serv; 553 rqstp->rq_server = serv;
569 rqstp->rq_pool = pool; 554 rqstp->rq_pool = pool;
570 555
556 rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
557 if (!rqstp->rq_argp)
558 goto out_thread;
559
560 rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
561 if (!rqstp->rq_resp)
562 goto out_thread;
563
564 if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
565 goto out_thread;
566
567 return rqstp;
568out_thread:
569 svc_exit_thread(rqstp);
570out_enomem:
571 return ERR_PTR(-ENOMEM);
572}
573EXPORT_SYMBOL(svc_prepare_thread);
574
575/*
576 * Create a thread in the given pool. Caller must hold BKL.
577 * On a NUMA or SMP machine, with a multi-pool serv, the thread
578 * will be restricted to run on the cpus belonging to the pool.
579 */
580static int
581__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
582 struct svc_pool *pool)
583{
584 struct svc_rqst *rqstp;
585 int error = -ENOMEM;
586 int have_oldmask = 0;
587 cpumask_t oldmask;
588
589 rqstp = svc_prepare_thread(serv, pool);
590 if (IS_ERR(rqstp)) {
591 error = PTR_ERR(rqstp);
592 goto out;
593 }
594
571 if (serv->sv_nrpools > 1) 595 if (serv->sv_nrpools > 1)
572 have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); 596 have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
573 597
@@ -596,6 +620,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
596{ 620{
597 return __svc_create_thread(func, serv, &serv->sv_pools[0]); 621 return __svc_create_thread(func, serv, &serv->sv_pools[0]);
598} 622}
623EXPORT_SYMBOL(svc_create_thread);
599 624
600/* 625/*
601 * Choose a pool in which to create a new thread, for svc_set_num_threads 626 * Choose a pool in which to create a new thread, for svc_set_num_threads
@@ -699,6 +724,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
699 724
700 return error; 725 return error;
701} 726}
727EXPORT_SYMBOL(svc_set_num_threads);
702 728
703/* 729/*
704 * Called from a server thread as it's exiting. Caller must hold BKL. 730 * Called from a server thread as it's exiting. Caller must hold BKL.
@@ -725,6 +751,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
725 if (serv) 751 if (serv)
726 svc_destroy(serv); 752 svc_destroy(serv);
727} 753}
754EXPORT_SYMBOL(svc_exit_thread);
728 755
729/* 756/*
730 * Register an RPC service with the local portmapper. 757 * Register an RPC service with the local portmapper.
@@ -736,7 +763,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
736{ 763{
737 struct svc_program *progp; 764 struct svc_program *progp;
738 unsigned long flags; 765 unsigned long flags;
739 int i, error = 0, dummy; 766 unsigned int i;
767 int error = 0, dummy;
740 768
741 if (!port) 769 if (!port)
742 clear_thread_flag(TIF_SIGPENDING); 770 clear_thread_flag(TIF_SIGPENDING);
@@ -839,9 +867,9 @@ svc_process(struct svc_rqst *rqstp)
839 rqstp->rq_res.tail[0].iov_len = 0; 867 rqstp->rq_res.tail[0].iov_len = 0;
840 /* Will be turned off only in gss privacy case: */ 868 /* Will be turned off only in gss privacy case: */
841 rqstp->rq_splice_ok = 1; 869 rqstp->rq_splice_ok = 1;
842 /* tcp needs a space for the record length... */ 870
843 if (rqstp->rq_prot == IPPROTO_TCP) 871 /* Setup reply header */
844 svc_putnl(resv, 0); 872 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
845 873
846 rqstp->rq_xid = svc_getu32(argv); 874 rqstp->rq_xid = svc_getu32(argv);
847 svc_putu32(resv, rqstp->rq_xid); 875 svc_putu32(resv, rqstp->rq_xid);
@@ -1048,16 +1076,15 @@ err_bad:
1048 svc_putnl(resv, ntohl(rpc_stat)); 1076 svc_putnl(resv, ntohl(rpc_stat));
1049 goto sendit; 1077 goto sendit;
1050} 1078}
1079EXPORT_SYMBOL(svc_process);
1051 1080
1052/* 1081/*
1053 * Return (transport-specific) limit on the rpc payload. 1082 * Return (transport-specific) limit on the rpc payload.
1054 */ 1083 */
1055u32 svc_max_payload(const struct svc_rqst *rqstp) 1084u32 svc_max_payload(const struct svc_rqst *rqstp)
1056{ 1085{
1057 int max = RPCSVC_MAXPAYLOAD_TCP; 1086 u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
1058 1087
1059 if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
1060 max = RPCSVC_MAXPAYLOAD_UDP;
1061 if (rqstp->rq_server->sv_max_payload < max) 1088 if (rqstp->rq_server->sv_max_payload < max)
1062 max = rqstp->rq_server->sv_max_payload; 1089 max = rqstp->rq_server->sv_max_payload;
1063 return max; 1090 return max;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
new file mode 100644
index 000000000000..ea377e06afae
--- /dev/null
+++ b/net/sunrpc/svc_xprt.c
@@ -0,0 +1,1055 @@
1/*
2 * linux/net/sunrpc/svc_xprt.c
3 *
4 * Author: Tom Tucker <tom@opengridcomputing.com>
5 */
6
7#include <linux/sched.h>
8#include <linux/errno.h>
9#include <linux/fcntl.h>
10#include <linux/net.h>
11#include <linux/in.h>
12#include <linux/inet.h>
13#include <linux/udp.h>
14#include <linux/tcp.h>
15#include <linux/unistd.h>
16#include <linux/slab.h>
17#include <linux/netdevice.h>
18#include <linux/skbuff.h>
19#include <linux/file.h>
20#include <linux/freezer.h>
21#include <net/sock.h>
22#include <net/checksum.h>
23#include <net/ip.h>
24#include <net/ipv6.h>
25#include <net/tcp_states.h>
26#include <linux/uaccess.h>
27#include <asm/ioctls.h>
28
29#include <linux/sunrpc/types.h>
30#include <linux/sunrpc/clnt.h>
31#include <linux/sunrpc/xdr.h>
32#include <linux/sunrpc/stats.h>
33#include <linux/sunrpc/svc_xprt.h>
34
35#define RPCDBG_FACILITY RPCDBG_SVCXPRT
36
37static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
38static int svc_deferred_recv(struct svc_rqst *rqstp);
39static struct cache_deferred_req *svc_defer(struct cache_req *req);
40static void svc_age_temp_xprts(unsigned long closure);
41
42/* apparently the "standard" is that clients close
43 * idle connections after 5 minutes, servers after
44 * 6 minutes
45 * http://www.connectathon.org/talks96/nfstcp.pdf
46 */
47static int svc_conn_age_period = 6*60;
48
49/* List of registered transport classes */
50static DEFINE_SPINLOCK(svc_xprt_class_lock);
51static LIST_HEAD(svc_xprt_class_list);
52
53/* SMP locking strategy:
54 *
55 * svc_pool->sp_lock protects most of the fields of that pool.
56 * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
57 * when both need to be taken (rare), svc_serv->sv_lock is first.
58 * BKL protects svc_serv->sv_nrthread.
59 * svc_sock->sk_lock protects the svc_sock->sk_deferred list
60 * and the ->sk_info_authunix cache.
61 *
62 * The XPT_BUSY bit in xprt->xpt_flags prevents a transport being
63 * enqueued multiply. During normal transport processing this bit
64 * is set by svc_xprt_enqueue and cleared by svc_xprt_received.
65 * Providers should not manipulate this bit directly.
66 *
67 * Some flags can be set to certain values at any time
68 * providing that certain rules are followed:
69 *
70 * XPT_CONN, XPT_DATA:
71 * - Can be set or cleared at any time.
72 * - After a set, svc_xprt_enqueue must be called to enqueue
73 * the transport for processing.
74 * - After a clear, the transport must be read/accepted.
75 * If this succeeds, it must be set again.
76 * XPT_CLOSE:
77 * - Can set at any time. It is never cleared.
78 * XPT_DEAD:
79 * - Can only be set while XPT_BUSY is held which ensures
80 * that no other thread will be using the transport or will
81 * try to set XPT_DEAD.
82 */
83
84int svc_reg_xprt_class(struct svc_xprt_class *xcl)
85{
86 struct svc_xprt_class *cl;
87 int res = -EEXIST;
88
89 dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
90
91 INIT_LIST_HEAD(&xcl->xcl_list);
92 spin_lock(&svc_xprt_class_lock);
93 /* Make sure there isn't already a class with the same name */
94 list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) {
95 if (strcmp(xcl->xcl_name, cl->xcl_name) == 0)
96 goto out;
97 }
98 list_add_tail(&xcl->xcl_list, &svc_xprt_class_list);
99 res = 0;
100out:
101 spin_unlock(&svc_xprt_class_lock);
102 return res;
103}
104EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
105
106void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
107{
108 dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
109 spin_lock(&svc_xprt_class_lock);
110 list_del_init(&xcl->xcl_list);
111 spin_unlock(&svc_xprt_class_lock);
112}
113EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
114
115/*
116 * Format the transport list for printing
117 */
118int svc_print_xprts(char *buf, int maxlen)
119{
120 struct list_head *le;
121 char tmpstr[80];
122 int len = 0;
123 buf[0] = '\0';
124
125 spin_lock(&svc_xprt_class_lock);
126 list_for_each(le, &svc_xprt_class_list) {
127 int slen;
128 struct svc_xprt_class *xcl =
129 list_entry(le, struct svc_xprt_class, xcl_list);
130
131 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
132 slen = strlen(tmpstr);
133 if (len + slen > maxlen)
134 break;
135 len += slen;
136 strcat(buf, tmpstr);
137 }
138 spin_unlock(&svc_xprt_class_lock);
139
140 return len;
141}
142
143static void svc_xprt_free(struct kref *kref)
144{
145 struct svc_xprt *xprt =
146 container_of(kref, struct svc_xprt, xpt_ref);
147 struct module *owner = xprt->xpt_class->xcl_owner;
148 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)
149 && xprt->xpt_auth_cache != NULL)
150 svcauth_unix_info_release(xprt->xpt_auth_cache);
151 xprt->xpt_ops->xpo_free(xprt);
152 module_put(owner);
153}
154
155void svc_xprt_put(struct svc_xprt *xprt)
156{
157 kref_put(&xprt->xpt_ref, svc_xprt_free);
158}
159EXPORT_SYMBOL_GPL(svc_xprt_put);
160
161/*
162 * Called by transport drivers to initialize the transport independent
163 * portion of the transport instance.
164 */
165void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
166 struct svc_serv *serv)
167{
168 memset(xprt, 0, sizeof(*xprt));
169 xprt->xpt_class = xcl;
170 xprt->xpt_ops = xcl->xcl_ops;
171 kref_init(&xprt->xpt_ref);
172 xprt->xpt_server = serv;
173 INIT_LIST_HEAD(&xprt->xpt_list);
174 INIT_LIST_HEAD(&xprt->xpt_ready);
175 INIT_LIST_HEAD(&xprt->xpt_deferred);
176 mutex_init(&xprt->xpt_mutex);
177 spin_lock_init(&xprt->xpt_lock);
178 set_bit(XPT_BUSY, &xprt->xpt_flags);
179}
180EXPORT_SYMBOL_GPL(svc_xprt_init);
181
182int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
183 int flags)
184{
185 struct svc_xprt_class *xcl;
186 struct sockaddr_in sin = {
187 .sin_family = AF_INET,
188 .sin_addr.s_addr = INADDR_ANY,
189 .sin_port = htons(port),
190 };
191 dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
192 spin_lock(&svc_xprt_class_lock);
193 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
194 struct svc_xprt *newxprt;
195
196 if (strcmp(xprt_name, xcl->xcl_name))
197 continue;
198
199 if (!try_module_get(xcl->xcl_owner))
200 goto err;
201
202 spin_unlock(&svc_xprt_class_lock);
203 newxprt = xcl->xcl_ops->
204 xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
205 flags);
206 if (IS_ERR(newxprt)) {
207 module_put(xcl->xcl_owner);
208 return PTR_ERR(newxprt);
209 }
210
211 clear_bit(XPT_TEMP, &newxprt->xpt_flags);
212 spin_lock_bh(&serv->sv_lock);
213 list_add(&newxprt->xpt_list, &serv->sv_permsocks);
214 spin_unlock_bh(&serv->sv_lock);
215 clear_bit(XPT_BUSY, &newxprt->xpt_flags);
216 return svc_xprt_local_port(newxprt);
217 }
218 err:
219 spin_unlock(&svc_xprt_class_lock);
220 dprintk("svc: transport %s not found\n", xprt_name);
221 return -ENOENT;
222}
223EXPORT_SYMBOL_GPL(svc_create_xprt);
224
225/*
226 * Copy the local and remote xprt addresses to the rqstp structure
227 */
228void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt)
229{
230 struct sockaddr *sin;
231
232 memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen);
233 rqstp->rq_addrlen = xprt->xpt_remotelen;
234
235 /*
236 * Destination address in request is needed for binding the
237 * source address in RPC replies/callbacks later.
238 */
239 sin = (struct sockaddr *)&xprt->xpt_local;
240 switch (sin->sa_family) {
241 case AF_INET:
242 rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
243 break;
244 case AF_INET6:
245 rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
246 break;
247 }
248}
249EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs);
250
251/**
252 * svc_print_addr - Format rq_addr field for printing
253 * @rqstp: svc_rqst struct containing address to print
254 * @buf: target buffer for formatted address
255 * @len: length of target buffer
256 *
257 */
258char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
259{
260 return __svc_print_addr(svc_addr(rqstp), buf, len);
261}
262EXPORT_SYMBOL_GPL(svc_print_addr);
263
264/*
265 * Queue up an idle server thread. Must have pool->sp_lock held.
266 * Note: this is really a stack rather than a queue, so that we only
267 * use as many different threads as we need, and the rest don't pollute
268 * the cache.
269 */
270static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
271{
272 list_add(&rqstp->rq_list, &pool->sp_threads);
273}
274
275/*
276 * Dequeue an nfsd thread. Must have pool->sp_lock held.
277 */
278static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
279{
280 list_del(&rqstp->rq_list);
281}
282
283/*
284 * Queue up a transport with data pending. If there are idle nfsd
285 * processes, wake 'em up.
286 *
287 */
288void svc_xprt_enqueue(struct svc_xprt *xprt)
289{
290 struct svc_serv *serv = xprt->xpt_server;
291 struct svc_pool *pool;
292 struct svc_rqst *rqstp;
293 int cpu;
294
295 if (!(xprt->xpt_flags &
296 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
297 return;
298 if (test_bit(XPT_DEAD, &xprt->xpt_flags))
299 return;
300
301 cpu = get_cpu();
302 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
303 put_cpu();
304
305 spin_lock_bh(&pool->sp_lock);
306
307 if (!list_empty(&pool->sp_threads) &&
308 !list_empty(&pool->sp_sockets))
309 printk(KERN_ERR
310 "svc_xprt_enqueue: "
311 "threads and transports both waiting??\n");
312
313 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
314 /* Don't enqueue dead transports */
315 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
316 goto out_unlock;
317 }
318
319 /* Mark transport as busy. It will remain in this state until
320 * the provider calls svc_xprt_received. We update XPT_BUSY
321 * atomically because it also guards against trying to enqueue
322 * the transport twice.
323 */
324 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
325 /* Don't enqueue transport while already enqueued */
326 dprintk("svc: transport %p busy, not enqueued\n", xprt);
327 goto out_unlock;
328 }
329 BUG_ON(xprt->xpt_pool != NULL);
330 xprt->xpt_pool = pool;
331
332 /* Handle pending connection */
333 if (test_bit(XPT_CONN, &xprt->xpt_flags))
334 goto process;
335
336 /* Handle close in-progress */
337 if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
338 goto process;
339
340 /* Check if we have space to reply to a request */
341 if (!xprt->xpt_ops->xpo_has_wspace(xprt)) {
342 /* Don't enqueue while not enough space for reply */
343 dprintk("svc: no write space, transport %p not enqueued\n",
344 xprt);
345 xprt->xpt_pool = NULL;
346 clear_bit(XPT_BUSY, &xprt->xpt_flags);
347 goto out_unlock;
348 }
349
350 process:
351 if (!list_empty(&pool->sp_threads)) {
352 rqstp = list_entry(pool->sp_threads.next,
353 struct svc_rqst,
354 rq_list);
355 dprintk("svc: transport %p served by daemon %p\n",
356 xprt, rqstp);
357 svc_thread_dequeue(pool, rqstp);
358 if (rqstp->rq_xprt)
359 printk(KERN_ERR
360 "svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
361 rqstp, rqstp->rq_xprt);
362 rqstp->rq_xprt = xprt;
363 svc_xprt_get(xprt);
364 rqstp->rq_reserved = serv->sv_max_mesg;
365 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
366 BUG_ON(xprt->xpt_pool != pool);
367 wake_up(&rqstp->rq_wait);
368 } else {
369 dprintk("svc: transport %p put into queue\n", xprt);
370 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
371 BUG_ON(xprt->xpt_pool != pool);
372 }
373
374out_unlock:
375 spin_unlock_bh(&pool->sp_lock);
376}
377EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
378
379/*
380 * Dequeue the first transport. Must be called with the pool->sp_lock held.
381 */
382static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
383{
384 struct svc_xprt *xprt;
385
386 if (list_empty(&pool->sp_sockets))
387 return NULL;
388
389 xprt = list_entry(pool->sp_sockets.next,
390 struct svc_xprt, xpt_ready);
391 list_del_init(&xprt->xpt_ready);
392
393 dprintk("svc: transport %p dequeued, inuse=%d\n",
394 xprt, atomic_read(&xprt->xpt_ref.refcount));
395
396 return xprt;
397}
398
399/*
400 * svc_xprt_received conditionally queues the transport for processing
401 * by another thread. The caller must hold the XPT_BUSY bit and must
402 * not thereafter touch transport data.
403 *
404 * Note: XPT_DATA only gets cleared when a read-attempt finds no (or
405 * insufficient) data.
406 */
407void svc_xprt_received(struct svc_xprt *xprt)
408{
409 BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
410 xprt->xpt_pool = NULL;
411 clear_bit(XPT_BUSY, &xprt->xpt_flags);
412 svc_xprt_enqueue(xprt);
413}
414EXPORT_SYMBOL_GPL(svc_xprt_received);
415
416/**
417 * svc_reserve - change the space reserved for the reply to a request.
418 * @rqstp: The request in question
419 * @space: new max space to reserve
420 *
421 * Each request reserves some space on the output queue of the transport
422 * to make sure the reply fits. This function reduces that reserved
423 * space to be the amount of space used already, plus @space.
424 *
425 */
426void svc_reserve(struct svc_rqst *rqstp, int space)
427{
428 space += rqstp->rq_res.head[0].iov_len;
429
430 if (space < rqstp->rq_reserved) {
431 struct svc_xprt *xprt = rqstp->rq_xprt;
432 atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
433 rqstp->rq_reserved = space;
434
435 svc_xprt_enqueue(xprt);
436 }
437}
438EXPORT_SYMBOL(svc_reserve);
439
440static void svc_xprt_release(struct svc_rqst *rqstp)
441{
442 struct svc_xprt *xprt = rqstp->rq_xprt;
443
444 rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
445
446 svc_free_res_pages(rqstp);
447 rqstp->rq_res.page_len = 0;
448 rqstp->rq_res.page_base = 0;
449
450 /* Reset response buffer and release
451 * the reservation.
452 * But first, check that enough space was reserved
453 * for the reply, otherwise we have a bug!
454 */
455 if ((rqstp->rq_res.len) > rqstp->rq_reserved)
456 printk(KERN_ERR "RPC request reserved %d but used %d\n",
457 rqstp->rq_reserved,
458 rqstp->rq_res.len);
459
460 rqstp->rq_res.head[0].iov_len = 0;
461 svc_reserve(rqstp, 0);
462 rqstp->rq_xprt = NULL;
463
464 svc_xprt_put(xprt);
465}
466
467/*
468 * External function to wake up a server waiting for data
469 * This really only makes sense for services like lockd
470 * which have exactly one thread anyway.
471 */
472void svc_wake_up(struct svc_serv *serv)
473{
474 struct svc_rqst *rqstp;
475 unsigned int i;
476 struct svc_pool *pool;
477
478 for (i = 0; i < serv->sv_nrpools; i++) {
479 pool = &serv->sv_pools[i];
480
481 spin_lock_bh(&pool->sp_lock);
482 if (!list_empty(&pool->sp_threads)) {
483 rqstp = list_entry(pool->sp_threads.next,
484 struct svc_rqst,
485 rq_list);
486 dprintk("svc: daemon %p woken up.\n", rqstp);
487 /*
488 svc_thread_dequeue(pool, rqstp);
489 rqstp->rq_xprt = NULL;
490 */
491 wake_up(&rqstp->rq_wait);
492 }
493 spin_unlock_bh(&pool->sp_lock);
494 }
495}
496EXPORT_SYMBOL(svc_wake_up);
497
498int svc_port_is_privileged(struct sockaddr *sin)
499{
500 switch (sin->sa_family) {
501 case AF_INET:
502 return ntohs(((struct sockaddr_in *)sin)->sin_port)
503 < PROT_SOCK;
504 case AF_INET6:
505 return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
506 < PROT_SOCK;
507 default:
508 return 0;
509 }
510}
511
512/*
513 * Make sure that we don't have too many active connections. If we
514 * have, something must be dropped.
515 *
516 * There's no point in trying to do random drop here for DoS
517 * prevention. The NFS clients does 1 reconnect in 15 seconds. An
518 * attacker can easily beat that.
519 *
520 * The only somewhat efficient mechanism would be if drop old
521 * connections from the same IP first. But right now we don't even
522 * record the client IP in svc_sock.
523 */
524static void svc_check_conn_limits(struct svc_serv *serv)
525{
526 if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
527 struct svc_xprt *xprt = NULL;
528 spin_lock_bh(&serv->sv_lock);
529 if (!list_empty(&serv->sv_tempsocks)) {
530 if (net_ratelimit()) {
531 /* Try to help the admin */
532 printk(KERN_NOTICE "%s: too many open "
533 "connections, consider increasing the "
534 "number of nfsd threads\n",
535 serv->sv_name);
536 }
537 /*
538 * Always select the oldest connection. It's not fair,
539 * but so is life
540 */
541 xprt = list_entry(serv->sv_tempsocks.prev,
542 struct svc_xprt,
543 xpt_list);
544 set_bit(XPT_CLOSE, &xprt->xpt_flags);
545 svc_xprt_get(xprt);
546 }
547 spin_unlock_bh(&serv->sv_lock);
548
549 if (xprt) {
550 svc_xprt_enqueue(xprt);
551 svc_xprt_put(xprt);
552 }
553 }
554}
555
556/*
557 * Receive the next request on any transport. This code is carefully
558 * organised not to touch any cachelines in the shared svc_serv
559 * structure, only cachelines in the local svc_pool.
560 */
561int svc_recv(struct svc_rqst *rqstp, long timeout)
562{
563 struct svc_xprt *xprt = NULL;
564 struct svc_serv *serv = rqstp->rq_server;
565 struct svc_pool *pool = rqstp->rq_pool;
566 int len, i;
567 int pages;
568 struct xdr_buf *arg;
569 DECLARE_WAITQUEUE(wait, current);
570
571 dprintk("svc: server %p waiting for data (to = %ld)\n",
572 rqstp, timeout);
573
574 if (rqstp->rq_xprt)
575 printk(KERN_ERR
576 "svc_recv: service %p, transport not NULL!\n",
577 rqstp);
578 if (waitqueue_active(&rqstp->rq_wait))
579 printk(KERN_ERR
580 "svc_recv: service %p, wait queue active!\n",
581 rqstp);
582
583 /* now allocate needed pages. If we get a failure, sleep briefly */
584 pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
585 for (i = 0; i < pages ; i++)
586 while (rqstp->rq_pages[i] == NULL) {
587 struct page *p = alloc_page(GFP_KERNEL);
588 if (!p) {
589 int j = msecs_to_jiffies(500);
590 schedule_timeout_uninterruptible(j);
591 }
592 rqstp->rq_pages[i] = p;
593 }
594 rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
595 BUG_ON(pages >= RPCSVC_MAXPAGES);
596
597 /* Make arg->head point to first page and arg->pages point to rest */
598 arg = &rqstp->rq_arg;
599 arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
600 arg->head[0].iov_len = PAGE_SIZE;
601 arg->pages = rqstp->rq_pages + 1;
602 arg->page_base = 0;
603 /* save at least one page for response */
604 arg->page_len = (pages-2)*PAGE_SIZE;
605 arg->len = (pages-1)*PAGE_SIZE;
606 arg->tail[0].iov_len = 0;
607
608 try_to_freeze();
609 cond_resched();
610 if (signalled())
611 return -EINTR;
612
613 spin_lock_bh(&pool->sp_lock);
614 xprt = svc_xprt_dequeue(pool);
615 if (xprt) {
616 rqstp->rq_xprt = xprt;
617 svc_xprt_get(xprt);
618 rqstp->rq_reserved = serv->sv_max_mesg;
619 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
620 } else {
621 /* No data pending. Go to sleep */
622 svc_thread_enqueue(pool, rqstp);
623
624 /*
625 * We have to be able to interrupt this wait
626 * to bring down the daemons ...
627 */
628 set_current_state(TASK_INTERRUPTIBLE);
629 add_wait_queue(&rqstp->rq_wait, &wait);
630 spin_unlock_bh(&pool->sp_lock);
631
632 schedule_timeout(timeout);
633
634 try_to_freeze();
635
636 spin_lock_bh(&pool->sp_lock);
637 remove_wait_queue(&rqstp->rq_wait, &wait);
638
639 xprt = rqstp->rq_xprt;
640 if (!xprt) {
641 svc_thread_dequeue(pool, rqstp);
642 spin_unlock_bh(&pool->sp_lock);
643 dprintk("svc: server %p, no data yet\n", rqstp);
644 return signalled()? -EINTR : -EAGAIN;
645 }
646 }
647 spin_unlock_bh(&pool->sp_lock);
648
649 len = 0;
650 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
651 dprintk("svc_recv: found XPT_CLOSE\n");
652 svc_delete_xprt(xprt);
653 } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
654 struct svc_xprt *newxpt;
655 newxpt = xprt->xpt_ops->xpo_accept(xprt);
656 if (newxpt) {
657 /*
658 * We know this module_get will succeed because the
659 * listener holds a reference too
660 */
661 __module_get(newxpt->xpt_class->xcl_owner);
662 svc_check_conn_limits(xprt->xpt_server);
663 spin_lock_bh(&serv->sv_lock);
664 set_bit(XPT_TEMP, &newxpt->xpt_flags);
665 list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
666 serv->sv_tmpcnt++;
667 if (serv->sv_temptimer.function == NULL) {
668 /* setup timer to age temp transports */
669 setup_timer(&serv->sv_temptimer,
670 svc_age_temp_xprts,
671 (unsigned long)serv);
672 mod_timer(&serv->sv_temptimer,
673 jiffies + svc_conn_age_period * HZ);
674 }
675 spin_unlock_bh(&serv->sv_lock);
676 svc_xprt_received(newxpt);
677 }
678 svc_xprt_received(xprt);
679 } else {
680 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
681 rqstp, pool->sp_id, xprt,
682 atomic_read(&xprt->xpt_ref.refcount));
683 rqstp->rq_deferred = svc_deferred_dequeue(xprt);
684 if (rqstp->rq_deferred) {
685 svc_xprt_received(xprt);
686 len = svc_deferred_recv(rqstp);
687 } else
688 len = xprt->xpt_ops->xpo_recvfrom(rqstp);
689 dprintk("svc: got len=%d\n", len);
690 }
691
692 /* No data, incomplete (TCP) read, or accept() */
693 if (len == 0 || len == -EAGAIN) {
694 rqstp->rq_res.len = 0;
695 svc_xprt_release(rqstp);
696 return -EAGAIN;
697 }
698 clear_bit(XPT_OLD, &xprt->xpt_flags);
699
700 rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
701 rqstp->rq_chandle.defer = svc_defer;
702
703 if (serv->sv_stats)
704 serv->sv_stats->netcnt++;
705 return len;
706}
707EXPORT_SYMBOL(svc_recv);
708
709/*
710 * Drop request
711 */
712void svc_drop(struct svc_rqst *rqstp)
713{
714 dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
715 svc_xprt_release(rqstp);
716}
717EXPORT_SYMBOL(svc_drop);
718
719/*
720 * Return reply to client.
721 */
722int svc_send(struct svc_rqst *rqstp)
723{
724 struct svc_xprt *xprt;
725 int len;
726 struct xdr_buf *xb;
727
728 xprt = rqstp->rq_xprt;
729 if (!xprt)
730 return -EFAULT;
731
732 /* release the receive skb before sending the reply */
733 rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
734
735 /* calculate over-all length */
736 xb = &rqstp->rq_res;
737 xb->len = xb->head[0].iov_len +
738 xb->page_len +
739 xb->tail[0].iov_len;
740
741 /* Grab mutex to serialize outgoing data. */
742 mutex_lock(&xprt->xpt_mutex);
743 if (test_bit(XPT_DEAD, &xprt->xpt_flags))
744 len = -ENOTCONN;
745 else
746 len = xprt->xpt_ops->xpo_sendto(rqstp);
747 mutex_unlock(&xprt->xpt_mutex);
748 svc_xprt_release(rqstp);
749
750 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
751 return 0;
752 return len;
753}
754
755/*
756 * Timer function to close old temporary transports, using
757 * a mark-and-sweep algorithm.
758 */
759static void svc_age_temp_xprts(unsigned long closure)
760{
761 struct svc_serv *serv = (struct svc_serv *)closure;
762 struct svc_xprt *xprt;
763 struct list_head *le, *next;
764 LIST_HEAD(to_be_aged);
765
766 dprintk("svc_age_temp_xprts\n");
767
768 if (!spin_trylock_bh(&serv->sv_lock)) {
769 /* busy, try again 1 sec later */
770 dprintk("svc_age_temp_xprts: busy\n");
771 mod_timer(&serv->sv_temptimer, jiffies + HZ);
772 return;
773 }
774
775 list_for_each_safe(le, next, &serv->sv_tempsocks) {
776 xprt = list_entry(le, struct svc_xprt, xpt_list);
777
778 /* First time through, just mark it OLD. Second time
779 * through, close it. */
780 if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags))
781 continue;
782 if (atomic_read(&xprt->xpt_ref.refcount) > 1
783 || test_bit(XPT_BUSY, &xprt->xpt_flags))
784 continue;
785 svc_xprt_get(xprt);
786 list_move(le, &to_be_aged);
787 set_bit(XPT_CLOSE, &xprt->xpt_flags);
788 set_bit(XPT_DETACHED, &xprt->xpt_flags);
789 }
790 spin_unlock_bh(&serv->sv_lock);
791
792 while (!list_empty(&to_be_aged)) {
793 le = to_be_aged.next;
794 /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
795 list_del_init(le);
796 xprt = list_entry(le, struct svc_xprt, xpt_list);
797
798 dprintk("queuing xprt %p for closing\n", xprt);
799
800 /* a thread will dequeue and close it soon */
801 svc_xprt_enqueue(xprt);
802 svc_xprt_put(xprt);
803 }
804
805 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
806}
807
808/*
809 * Remove a dead transport
810 */
811void svc_delete_xprt(struct svc_xprt *xprt)
812{
813 struct svc_serv *serv = xprt->xpt_server;
814
815 dprintk("svc: svc_delete_xprt(%p)\n", xprt);
816 xprt->xpt_ops->xpo_detach(xprt);
817
818 spin_lock_bh(&serv->sv_lock);
819 if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
820 list_del_init(&xprt->xpt_list);
821 /*
822 * We used to delete the transport from whichever list
823 * it's sk_xprt.xpt_ready node was on, but we don't actually
824 * need to. This is because the only time we're called
825 * while still attached to a queue, the queue itself
826 * is about to be destroyed (in svc_destroy).
827 */
828 if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) {
829 BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2);
830 if (test_bit(XPT_TEMP, &xprt->xpt_flags))
831 serv->sv_tmpcnt--;
832 svc_xprt_put(xprt);
833 }
834 spin_unlock_bh(&serv->sv_lock);
835}
836
837void svc_close_xprt(struct svc_xprt *xprt)
838{
839 set_bit(XPT_CLOSE, &xprt->xpt_flags);
840 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
841 /* someone else will have to effect the close */
842 return;
843
844 svc_xprt_get(xprt);
845 svc_delete_xprt(xprt);
846 clear_bit(XPT_BUSY, &xprt->xpt_flags);
847 svc_xprt_put(xprt);
848}
849EXPORT_SYMBOL_GPL(svc_close_xprt);
850
851void svc_close_all(struct list_head *xprt_list)
852{
853 struct svc_xprt *xprt;
854 struct svc_xprt *tmp;
855
856 list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
857 set_bit(XPT_CLOSE, &xprt->xpt_flags);
858 if (test_bit(XPT_BUSY, &xprt->xpt_flags)) {
859 /* Waiting to be processed, but no threads left,
860 * So just remove it from the waiting list
861 */
862 list_del_init(&xprt->xpt_ready);
863 clear_bit(XPT_BUSY, &xprt->xpt_flags);
864 }
865 svc_close_xprt(xprt);
866 }
867}
868
869/*
870 * Handle defer and revisit of requests
871 */
872
873static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
874{
875 struct svc_deferred_req *dr =
876 container_of(dreq, struct svc_deferred_req, handle);
877 struct svc_xprt *xprt = dr->xprt;
878
879 if (too_many) {
880 svc_xprt_put(xprt);
881 kfree(dr);
882 return;
883 }
884 dprintk("revisit queued\n");
885 dr->xprt = NULL;
886 spin_lock(&xprt->xpt_lock);
887 list_add(&dr->handle.recent, &xprt->xpt_deferred);
888 spin_unlock(&xprt->xpt_lock);
889 set_bit(XPT_DEFERRED, &xprt->xpt_flags);
890 svc_xprt_enqueue(xprt);
891 svc_xprt_put(xprt);
892}
893
894/*
895 * Save the request off for later processing. The request buffer looks
896 * like this:
897 *
898 * <xprt-header><rpc-header><rpc-pagelist><rpc-tail>
899 *
900 * This code can only handle requests that consist of an xprt-header
901 * and rpc-header.
902 */
903static struct cache_deferred_req *svc_defer(struct cache_req *req)
904{
905 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
906 struct svc_deferred_req *dr;
907
908 if (rqstp->rq_arg.page_len)
909 return NULL; /* if more than a page, give up FIXME */
910 if (rqstp->rq_deferred) {
911 dr = rqstp->rq_deferred;
912 rqstp->rq_deferred = NULL;
913 } else {
914 size_t skip;
915 size_t size;
916 /* FIXME maybe discard if size too large */
917 size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
918 dr = kmalloc(size, GFP_KERNEL);
919 if (dr == NULL)
920 return NULL;
921
922 dr->handle.owner = rqstp->rq_server;
923 dr->prot = rqstp->rq_prot;
924 memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
925 dr->addrlen = rqstp->rq_addrlen;
926 dr->daddr = rqstp->rq_daddr;
927 dr->argslen = rqstp->rq_arg.len >> 2;
928 dr->xprt_hlen = rqstp->rq_xprt_hlen;
929
930 /* back up head to the start of the buffer and copy */
931 skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
932 memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
933 dr->argslen << 2);
934 }
935 svc_xprt_get(rqstp->rq_xprt);
936 dr->xprt = rqstp->rq_xprt;
937
938 dr->handle.revisit = svc_revisit;
939 return &dr->handle;
940}
941
942/*
943 * recv data from a deferred request into an active one
944 */
945static int svc_deferred_recv(struct svc_rqst *rqstp)
946{
947 struct svc_deferred_req *dr = rqstp->rq_deferred;
948
949 /* setup iov_base past transport header */
950 rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
951 /* The iov_len does not include the transport header bytes */
952 rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
953 rqstp->rq_arg.page_len = 0;
954 /* The rq_arg.len includes the transport header bytes */
955 rqstp->rq_arg.len = dr->argslen<<2;
956 rqstp->rq_prot = dr->prot;
957 memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
958 rqstp->rq_addrlen = dr->addrlen;
959 /* Save off transport header len in case we get deferred again */
960 rqstp->rq_xprt_hlen = dr->xprt_hlen;
961 rqstp->rq_daddr = dr->daddr;
962 rqstp->rq_respages = rqstp->rq_pages;
963 return (dr->argslen<<2) - dr->xprt_hlen;
964}
965
966
967static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
968{
969 struct svc_deferred_req *dr = NULL;
970
971 if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))
972 return NULL;
973 spin_lock(&xprt->xpt_lock);
974 clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
975 if (!list_empty(&xprt->xpt_deferred)) {
976 dr = list_entry(xprt->xpt_deferred.next,
977 struct svc_deferred_req,
978 handle.recent);
979 list_del_init(&dr->handle.recent);
980 set_bit(XPT_DEFERRED, &xprt->xpt_flags);
981 }
982 spin_unlock(&xprt->xpt_lock);
983 return dr;
984}
985
986/*
987 * Return the transport instance pointer for the endpoint accepting
988 * connections/peer traffic from the specified transport class,
989 * address family and port.
990 *
991 * Specifying 0 for the address family or port is effectively a
992 * wild-card, and will result in matching the first transport in the
993 * service's list that has a matching class name.
994 */
995struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
996 int af, int port)
997{
998 struct svc_xprt *xprt;
999 struct svc_xprt *found = NULL;
1000
1001 /* Sanity check the args */
1002 if (!serv || !xcl_name)
1003 return found;
1004
1005 spin_lock_bh(&serv->sv_lock);
1006 list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
1007 if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
1008 continue;
1009 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
1010 continue;
1011 if (port && port != svc_xprt_local_port(xprt))
1012 continue;
1013 found = xprt;
1014 svc_xprt_get(xprt);
1015 break;
1016 }
1017 spin_unlock_bh(&serv->sv_lock);
1018 return found;
1019}
1020EXPORT_SYMBOL_GPL(svc_find_xprt);
1021
1022/*
1023 * Format a buffer with a list of the active transports. A zero for
1024 * the buflen parameter disables target buffer overflow checking.
1025 */
1026int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
1027{
1028 struct svc_xprt *xprt;
1029 char xprt_str[64];
1030 int totlen = 0;
1031 int len;
1032
1033 /* Sanity check args */
1034 if (!serv)
1035 return 0;
1036
1037 spin_lock_bh(&serv->sv_lock);
1038 list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
1039 len = snprintf(xprt_str, sizeof(xprt_str),
1040 "%s %d\n", xprt->xpt_class->xcl_name,
1041 svc_xprt_local_port(xprt));
1042 /* If the string was truncated, replace with error string */
1043 if (len >= sizeof(xprt_str))
1044 strcpy(xprt_str, "name-too-long\n");
1045 /* Don't overflow buffer */
1046 len = strlen(xprt_str);
1047 if (buflen && (len + totlen >= buflen))
1048 break;
1049 strcpy(buf+totlen, xprt_str);
1050 totlen += len;
1051 }
1052 spin_unlock_bh(&serv->sv_lock);
1053 return totlen;
1054}
1055EXPORT_SYMBOL_GPL(svc_xprt_names);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index af7c5f05c6e1..8a73cbb16052 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -57,11 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
57 rqstp->rq_authop = aops; 57 rqstp->rq_authop = aops;
58 return aops->accept(rqstp, authp); 58 return aops->accept(rqstp, authp);
59} 59}
60EXPORT_SYMBOL(svc_authenticate);
60 61
61int svc_set_client(struct svc_rqst *rqstp) 62int svc_set_client(struct svc_rqst *rqstp)
62{ 63{
63 return rqstp->rq_authop->set_client(rqstp); 64 return rqstp->rq_authop->set_client(rqstp);
64} 65}
66EXPORT_SYMBOL(svc_set_client);
65 67
66/* A request, which was authenticated, has now executed. 68/* A request, which was authenticated, has now executed.
67 * Time to finalise the credentials and verifier 69 * Time to finalise the credentials and verifier
@@ -93,6 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
93 spin_unlock(&authtab_lock); 95 spin_unlock(&authtab_lock);
94 return rv; 96 return rv;
95} 97}
98EXPORT_SYMBOL(svc_auth_register);
96 99
97void 100void
98svc_auth_unregister(rpc_authflavor_t flavor) 101svc_auth_unregister(rpc_authflavor_t flavor)
@@ -129,6 +132,7 @@ void auth_domain_put(struct auth_domain *dom)
129 spin_unlock(&auth_domain_lock); 132 spin_unlock(&auth_domain_lock);
130 } 133 }
131} 134}
135EXPORT_SYMBOL(auth_domain_put);
132 136
133struct auth_domain * 137struct auth_domain *
134auth_domain_lookup(char *name, struct auth_domain *new) 138auth_domain_lookup(char *name, struct auth_domain *new)
@@ -153,8 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new)
153 spin_unlock(&auth_domain_lock); 157 spin_unlock(&auth_domain_lock);
154 return new; 158 return new;
155} 159}
160EXPORT_SYMBOL(auth_domain_lookup);
156 161
157struct auth_domain *auth_domain_find(char *name) 162struct auth_domain *auth_domain_find(char *name)
158{ 163{
159 return auth_domain_lookup(name, NULL); 164 return auth_domain_lookup(name, NULL);
160} 165}
166EXPORT_SYMBOL(auth_domain_find);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 411479411b21..3c64051e4555 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -63,6 +63,7 @@ struct auth_domain *unix_domain_find(char *name)
63 rv = auth_domain_lookup(name, &new->h); 63 rv = auth_domain_lookup(name, &new->h);
64 } 64 }
65} 65}
66EXPORT_SYMBOL(unix_domain_find);
66 67
67static void svcauth_unix_domain_release(struct auth_domain *dom) 68static void svcauth_unix_domain_release(struct auth_domain *dom)
68{ 69{
@@ -340,6 +341,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
340 else 341 else
341 return -ENOMEM; 342 return -ENOMEM;
342} 343}
344EXPORT_SYMBOL(auth_unix_add_addr);
343 345
344int auth_unix_forget_old(struct auth_domain *dom) 346int auth_unix_forget_old(struct auth_domain *dom)
345{ 347{
@@ -351,6 +353,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
351 udom->addr_changes++; 353 udom->addr_changes++;
352 return 0; 354 return 0;
353} 355}
356EXPORT_SYMBOL(auth_unix_forget_old);
354 357
355struct auth_domain *auth_unix_lookup(struct in_addr addr) 358struct auth_domain *auth_unix_lookup(struct in_addr addr)
356{ 359{
@@ -375,50 +378,56 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
375 cache_put(&ipm->h, &ip_map_cache); 378 cache_put(&ipm->h, &ip_map_cache);
376 return rv; 379 return rv;
377} 380}
381EXPORT_SYMBOL(auth_unix_lookup);
378 382
379void svcauth_unix_purge(void) 383void svcauth_unix_purge(void)
380{ 384{
381 cache_purge(&ip_map_cache); 385 cache_purge(&ip_map_cache);
382} 386}
387EXPORT_SYMBOL(svcauth_unix_purge);
383 388
384static inline struct ip_map * 389static inline struct ip_map *
385ip_map_cached_get(struct svc_rqst *rqstp) 390ip_map_cached_get(struct svc_rqst *rqstp)
386{ 391{
387 struct ip_map *ipm; 392 struct ip_map *ipm = NULL;
388 struct svc_sock *svsk = rqstp->rq_sock; 393 struct svc_xprt *xprt = rqstp->rq_xprt;
389 spin_lock(&svsk->sk_lock); 394
390 ipm = svsk->sk_info_authunix; 395 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
391 if (ipm != NULL) { 396 spin_lock(&xprt->xpt_lock);
392 if (!cache_valid(&ipm->h)) { 397 ipm = xprt->xpt_auth_cache;
393 /* 398 if (ipm != NULL) {
394 * The entry has been invalidated since it was 399 if (!cache_valid(&ipm->h)) {
395 * remembered, e.g. by a second mount from the 400 /*
396 * same IP address. 401 * The entry has been invalidated since it was
397 */ 402 * remembered, e.g. by a second mount from the
398 svsk->sk_info_authunix = NULL; 403 * same IP address.
399 spin_unlock(&svsk->sk_lock); 404 */
400 cache_put(&ipm->h, &ip_map_cache); 405 xprt->xpt_auth_cache = NULL;
401 return NULL; 406 spin_unlock(&xprt->xpt_lock);
407 cache_put(&ipm->h, &ip_map_cache);
408 return NULL;
409 }
410 cache_get(&ipm->h);
402 } 411 }
403 cache_get(&ipm->h); 412 spin_unlock(&xprt->xpt_lock);
404 } 413 }
405 spin_unlock(&svsk->sk_lock);
406 return ipm; 414 return ipm;
407} 415}
408 416
409static inline void 417static inline void
410ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) 418ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
411{ 419{
412 struct svc_sock *svsk = rqstp->rq_sock; 420 struct svc_xprt *xprt = rqstp->rq_xprt;
413 421
414 spin_lock(&svsk->sk_lock); 422 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
415 if (svsk->sk_sock->type == SOCK_STREAM && 423 spin_lock(&xprt->xpt_lock);
416 svsk->sk_info_authunix == NULL) { 424 if (xprt->xpt_auth_cache == NULL) {
417 /* newly cached, keep the reference */ 425 /* newly cached, keep the reference */
418 svsk->sk_info_authunix = ipm; 426 xprt->xpt_auth_cache = ipm;
419 ipm = NULL; 427 ipm = NULL;
428 }
429 spin_unlock(&xprt->xpt_lock);
420 } 430 }
421 spin_unlock(&svsk->sk_lock);
422 if (ipm) 431 if (ipm)
423 cache_put(&ipm->h, &ip_map_cache); 432 cache_put(&ipm->h, &ip_map_cache);
424} 433}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c75bffeb89eb..1d3e5fcc2cc4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * The server scheduling algorithm does not always distribute the load 6 * The server scheduling algorithm does not always distribute the load
7 * evenly when servicing a single client. May need to modify the 7 * evenly when servicing a single client. May need to modify the
8 * svc_sock_enqueue procedure... 8 * svc_xprt_enqueue procedure...
9 * 9 *
10 * TCP support is largely untested and may be a little slow. The problem 10 * TCP support is largely untested and may be a little slow. The problem
11 * is that we currently do two separate recvfrom's, one for the 4-byte 11 * is that we currently do two separate recvfrom's, one for the 4-byte
@@ -48,72 +48,40 @@
48#include <linux/sunrpc/svcsock.h> 48#include <linux/sunrpc/svcsock.h>
49#include <linux/sunrpc/stats.h> 49#include <linux/sunrpc/stats.h>
50 50
51/* SMP locking strategy: 51#define RPCDBG_FACILITY RPCDBG_SVCXPRT
52 *
53 * svc_pool->sp_lock protects most of the fields of that pool.
54 * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
55 * when both need to be taken (rare), svc_serv->sv_lock is first.
56 * BKL protects svc_serv->sv_nrthread.
57 * svc_sock->sk_lock protects the svc_sock->sk_deferred list
58 * and the ->sk_info_authunix cache.
59 * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply.
60 *
61 * Some flags can be set to certain values at any time
62 * providing that certain rules are followed:
63 *
64 * SK_CONN, SK_DATA, can be set or cleared at any time.
65 * after a set, svc_sock_enqueue must be called.
66 * after a clear, the socket must be read/accepted
67 * if this succeeds, it must be set again.
68 * SK_CLOSE can set at any time. It is never cleared.
69 * sk_inuse contains a bias of '1' until SK_DEAD is set.
70 * so when sk_inuse hits zero, we know the socket is dead
71 * and no-one is using it.
72 * SK_DEAD can only be set while SK_BUSY is held which ensures
73 * no other thread will be using the socket or will try to
74 * set SK_DEAD.
75 *
76 */
77
78#define RPCDBG_FACILITY RPCDBG_SVCSOCK
79 52
80 53
81static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, 54static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
82 int *errp, int flags); 55 int *errp, int flags);
83static void svc_delete_socket(struct svc_sock *svsk);
84static void svc_udp_data_ready(struct sock *, int); 56static void svc_udp_data_ready(struct sock *, int);
85static int svc_udp_recvfrom(struct svc_rqst *); 57static int svc_udp_recvfrom(struct svc_rqst *);
86static int svc_udp_sendto(struct svc_rqst *); 58static int svc_udp_sendto(struct svc_rqst *);
87static void svc_close_socket(struct svc_sock *svsk); 59static void svc_sock_detach(struct svc_xprt *);
88 60static void svc_sock_free(struct svc_xprt *);
89static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
90static int svc_deferred_recv(struct svc_rqst *rqstp);
91static struct cache_deferred_req *svc_defer(struct cache_req *req);
92
93/* apparently the "standard" is that clients close
94 * idle connections after 5 minutes, servers after
95 * 6 minutes
96 * http://www.connectathon.org/talks96/nfstcp.pdf
97 */
98static int svc_conn_age_period = 6*60;
99 61
62static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
63 struct sockaddr *, int, int);
100#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
101static struct lock_class_key svc_key[2]; 65static struct lock_class_key svc_key[2];
102static struct lock_class_key svc_slock_key[2]; 66static struct lock_class_key svc_slock_key[2];
103 67
104static inline void svc_reclassify_socket(struct socket *sock) 68static void svc_reclassify_socket(struct socket *sock)
105{ 69{
106 struct sock *sk = sock->sk; 70 struct sock *sk = sock->sk;
107 BUG_ON(sock_owned_by_user(sk)); 71 BUG_ON(sock_owned_by_user(sk));
108 switch (sk->sk_family) { 72 switch (sk->sk_family) {
109 case AF_INET: 73 case AF_INET:
110 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", 74 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
111 &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); 75 &svc_slock_key[0],
76 "sk_xprt.xpt_lock-AF_INET-NFSD",
77 &svc_key[0]);
112 break; 78 break;
113 79
114 case AF_INET6: 80 case AF_INET6:
115 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", 81 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
116 &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); 82 &svc_slock_key[1],
83 "sk_xprt.xpt_lock-AF_INET6-NFSD",
84 &svc_key[1]);
117 break; 85 break;
118 86
119 default: 87 default:
@@ -121,81 +89,26 @@ static inline void svc_reclassify_socket(struct socket *sock)
121 } 89 }
122} 90}
123#else 91#else
124static inline void svc_reclassify_socket(struct socket *sock) 92static void svc_reclassify_socket(struct socket *sock)
125{ 93{
126} 94}
127#endif 95#endif
128 96
129static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len)
130{
131 switch (addr->sa_family) {
132 case AF_INET:
133 snprintf(buf, len, "%u.%u.%u.%u, port=%u",
134 NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
135 ntohs(((struct sockaddr_in *) addr)->sin_port));
136 break;
137
138 case AF_INET6:
139 snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
140 NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
141 ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
142 break;
143
144 default:
145 snprintf(buf, len, "unknown address type: %d", addr->sa_family);
146 break;
147 }
148 return buf;
149}
150
151/**
152 * svc_print_addr - Format rq_addr field for printing
153 * @rqstp: svc_rqst struct containing address to print
154 * @buf: target buffer for formatted address
155 * @len: length of target buffer
156 *
157 */
158char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
159{
160 return __svc_print_addr(svc_addr(rqstp), buf, len);
161}
162EXPORT_SYMBOL_GPL(svc_print_addr);
163
164/*
165 * Queue up an idle server thread. Must have pool->sp_lock held.
166 * Note: this is really a stack rather than a queue, so that we only
167 * use as many different threads as we need, and the rest don't pollute
168 * the cache.
169 */
170static inline void
171svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
172{
173 list_add(&rqstp->rq_list, &pool->sp_threads);
174}
175
176/*
177 * Dequeue an nfsd thread. Must have pool->sp_lock held.
178 */
179static inline void
180svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
181{
182 list_del(&rqstp->rq_list);
183}
184
185/* 97/*
186 * Release an skbuff after use 98 * Release an skbuff after use
187 */ 99 */
188static inline void 100static void svc_release_skb(struct svc_rqst *rqstp)
189svc_release_skb(struct svc_rqst *rqstp)
190{ 101{
191 struct sk_buff *skb = rqstp->rq_skbuff; 102 struct sk_buff *skb = rqstp->rq_xprt_ctxt;
192 struct svc_deferred_req *dr = rqstp->rq_deferred; 103 struct svc_deferred_req *dr = rqstp->rq_deferred;
193 104
194 if (skb) { 105 if (skb) {
195 rqstp->rq_skbuff = NULL; 106 struct svc_sock *svsk =
107 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
108 rqstp->rq_xprt_ctxt = NULL;
196 109
197 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); 110 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
198 skb_free_datagram(rqstp->rq_sock->sk_sk, skb); 111 skb_free_datagram(svsk->sk_sk, skb);
199 } 112 }
200 if (dr) { 113 if (dr) {
201 rqstp->rq_deferred = NULL; 114 rqstp->rq_deferred = NULL;
@@ -203,253 +116,6 @@ svc_release_skb(struct svc_rqst *rqstp)
203 } 116 }
204} 117}
205 118
206/*
207 * Any space to write?
208 */
209static inline unsigned long
210svc_sock_wspace(struct svc_sock *svsk)
211{
212 int wspace;
213
214 if (svsk->sk_sock->type == SOCK_STREAM)
215 wspace = sk_stream_wspace(svsk->sk_sk);
216 else
217 wspace = sock_wspace(svsk->sk_sk);
218
219 return wspace;
220}
221
222/*
223 * Queue up a socket with data pending. If there are idle nfsd
224 * processes, wake 'em up.
225 *
226 */
227static void
228svc_sock_enqueue(struct svc_sock *svsk)
229{
230 struct svc_serv *serv = svsk->sk_server;
231 struct svc_pool *pool;
232 struct svc_rqst *rqstp;
233 int cpu;
234
235 if (!(svsk->sk_flags &
236 ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
237 return;
238 if (test_bit(SK_DEAD, &svsk->sk_flags))
239 return;
240
241 cpu = get_cpu();
242 pool = svc_pool_for_cpu(svsk->sk_server, cpu);
243 put_cpu();
244
245 spin_lock_bh(&pool->sp_lock);
246
247 if (!list_empty(&pool->sp_threads) &&
248 !list_empty(&pool->sp_sockets))
249 printk(KERN_ERR
250 "svc_sock_enqueue: threads and sockets both waiting??\n");
251
252 if (test_bit(SK_DEAD, &svsk->sk_flags)) {
253 /* Don't enqueue dead sockets */
254 dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
255 goto out_unlock;
256 }
257
258 /* Mark socket as busy. It will remain in this state until the
259 * server has processed all pending data and put the socket back
260 * on the idle list. We update SK_BUSY atomically because
261 * it also guards against trying to enqueue the svc_sock twice.
262 */
263 if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) {
264 /* Don't enqueue socket while already enqueued */
265 dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
266 goto out_unlock;
267 }
268 BUG_ON(svsk->sk_pool != NULL);
269 svsk->sk_pool = pool;
270
271 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
272 if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
273 > svc_sock_wspace(svsk))
274 && !test_bit(SK_CLOSE, &svsk->sk_flags)
275 && !test_bit(SK_CONN, &svsk->sk_flags)) {
276 /* Don't enqueue while not enough space for reply */
277 dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
278 svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
279 svc_sock_wspace(svsk));
280 svsk->sk_pool = NULL;
281 clear_bit(SK_BUSY, &svsk->sk_flags);
282 goto out_unlock;
283 }
284 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
285
286
287 if (!list_empty(&pool->sp_threads)) {
288 rqstp = list_entry(pool->sp_threads.next,
289 struct svc_rqst,
290 rq_list);
291 dprintk("svc: socket %p served by daemon %p\n",
292 svsk->sk_sk, rqstp);
293 svc_thread_dequeue(pool, rqstp);
294 if (rqstp->rq_sock)
295 printk(KERN_ERR
296 "svc_sock_enqueue: server %p, rq_sock=%p!\n",
297 rqstp, rqstp->rq_sock);
298 rqstp->rq_sock = svsk;
299 atomic_inc(&svsk->sk_inuse);
300 rqstp->rq_reserved = serv->sv_max_mesg;
301 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
302 BUG_ON(svsk->sk_pool != pool);
303 wake_up(&rqstp->rq_wait);
304 } else {
305 dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
306 list_add_tail(&svsk->sk_ready, &pool->sp_sockets);
307 BUG_ON(svsk->sk_pool != pool);
308 }
309
310out_unlock:
311 spin_unlock_bh(&pool->sp_lock);
312}
313
314/*
315 * Dequeue the first socket. Must be called with the pool->sp_lock held.
316 */
317static inline struct svc_sock *
318svc_sock_dequeue(struct svc_pool *pool)
319{
320 struct svc_sock *svsk;
321
322 if (list_empty(&pool->sp_sockets))
323 return NULL;
324
325 svsk = list_entry(pool->sp_sockets.next,
326 struct svc_sock, sk_ready);
327 list_del_init(&svsk->sk_ready);
328
329 dprintk("svc: socket %p dequeued, inuse=%d\n",
330 svsk->sk_sk, atomic_read(&svsk->sk_inuse));
331
332 return svsk;
333}
334
335/*
336 * Having read something from a socket, check whether it
337 * needs to be re-enqueued.
338 * Note: SK_DATA only gets cleared when a read-attempt finds
339 * no (or insufficient) data.
340 */
341static inline void
342svc_sock_received(struct svc_sock *svsk)
343{
344 svsk->sk_pool = NULL;
345 clear_bit(SK_BUSY, &svsk->sk_flags);
346 svc_sock_enqueue(svsk);
347}
348
349
350/**
351 * svc_reserve - change the space reserved for the reply to a request.
352 * @rqstp: The request in question
353 * @space: new max space to reserve
354 *
355 * Each request reserves some space on the output queue of the socket
356 * to make sure the reply fits. This function reduces that reserved
357 * space to be the amount of space used already, plus @space.
358 *
359 */
360void svc_reserve(struct svc_rqst *rqstp, int space)
361{
362 space += rqstp->rq_res.head[0].iov_len;
363
364 if (space < rqstp->rq_reserved) {
365 struct svc_sock *svsk = rqstp->rq_sock;
366 atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved);
367 rqstp->rq_reserved = space;
368
369 svc_sock_enqueue(svsk);
370 }
371}
372
373/*
374 * Release a socket after use.
375 */
376static inline void
377svc_sock_put(struct svc_sock *svsk)
378{
379 if (atomic_dec_and_test(&svsk->sk_inuse)) {
380 BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
381
382 dprintk("svc: releasing dead socket\n");
383 if (svsk->sk_sock->file)
384 sockfd_put(svsk->sk_sock);
385 else
386 sock_release(svsk->sk_sock);
387 if (svsk->sk_info_authunix != NULL)
388 svcauth_unix_info_release(svsk->sk_info_authunix);
389 kfree(svsk);
390 }
391}
392
393static void
394svc_sock_release(struct svc_rqst *rqstp)
395{
396 struct svc_sock *svsk = rqstp->rq_sock;
397
398 svc_release_skb(rqstp);
399
400 svc_free_res_pages(rqstp);
401 rqstp->rq_res.page_len = 0;
402 rqstp->rq_res.page_base = 0;
403
404
405 /* Reset response buffer and release
406 * the reservation.
407 * But first, check that enough space was reserved
408 * for the reply, otherwise we have a bug!
409 */
410 if ((rqstp->rq_res.len) > rqstp->rq_reserved)
411 printk(KERN_ERR "RPC request reserved %d but used %d\n",
412 rqstp->rq_reserved,
413 rqstp->rq_res.len);
414
415 rqstp->rq_res.head[0].iov_len = 0;
416 svc_reserve(rqstp, 0);
417 rqstp->rq_sock = NULL;
418
419 svc_sock_put(svsk);
420}
421
422/*
423 * External function to wake up a server waiting for data
424 * This really only makes sense for services like lockd
425 * which have exactly one thread anyway.
426 */
427void
428svc_wake_up(struct svc_serv *serv)
429{
430 struct svc_rqst *rqstp;
431 unsigned int i;
432 struct svc_pool *pool;
433
434 for (i = 0; i < serv->sv_nrpools; i++) {
435 pool = &serv->sv_pools[i];
436
437 spin_lock_bh(&pool->sp_lock);
438 if (!list_empty(&pool->sp_threads)) {
439 rqstp = list_entry(pool->sp_threads.next,
440 struct svc_rqst,
441 rq_list);
442 dprintk("svc: daemon %p woken up.\n", rqstp);
443 /*
444 svc_thread_dequeue(pool, rqstp);
445 rqstp->rq_sock = NULL;
446 */
447 wake_up(&rqstp->rq_wait);
448 }
449 spin_unlock_bh(&pool->sp_lock);
450 }
451}
452
453union svc_pktinfo_u { 119union svc_pktinfo_u {
454 struct in_pktinfo pkti; 120 struct in_pktinfo pkti;
455 struct in6_pktinfo pkti6; 121 struct in6_pktinfo pkti6;
@@ -459,7 +125,9 @@ union svc_pktinfo_u {
459 125
460static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) 126static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
461{ 127{
462 switch (rqstp->rq_sock->sk_sk->sk_family) { 128 struct svc_sock *svsk =
129 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
130 switch (svsk->sk_sk->sk_family) {
463 case AF_INET: { 131 case AF_INET: {
464 struct in_pktinfo *pki = CMSG_DATA(cmh); 132 struct in_pktinfo *pki = CMSG_DATA(cmh);
465 133
@@ -489,10 +157,10 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
489/* 157/*
490 * Generic sendto routine 158 * Generic sendto routine
491 */ 159 */
492static int 160static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
493svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
494{ 161{
495 struct svc_sock *svsk = rqstp->rq_sock; 162 struct svc_sock *svsk =
163 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
496 struct socket *sock = svsk->sk_sock; 164 struct socket *sock = svsk->sk_sock;
497 int slen; 165 int slen;
498 union { 166 union {
@@ -565,7 +233,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
565 } 233 }
566out: 234out:
567 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", 235 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
568 rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, 236 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
569 xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); 237 xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
570 238
571 return len; 239 return len;
@@ -602,7 +270,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
602 if (!serv) 270 if (!serv)
603 return 0; 271 return 0;
604 spin_lock_bh(&serv->sv_lock); 272 spin_lock_bh(&serv->sv_lock);
605 list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { 273 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
606 int onelen = one_sock_name(buf+len, svsk); 274 int onelen = one_sock_name(buf+len, svsk);
607 if (toclose && strcmp(toclose, buf+len) == 0) 275 if (toclose && strcmp(toclose, buf+len) == 0)
608 closesk = svsk; 276 closesk = svsk;
@@ -614,7 +282,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
614 /* Should unregister with portmap, but you cannot 282 /* Should unregister with portmap, but you cannot
615 * unregister just one protocol... 283 * unregister just one protocol...
616 */ 284 */
617 svc_close_socket(closesk); 285 svc_close_xprt(&closesk->sk_xprt);
618 else if (toclose) 286 else if (toclose)
619 return -ENOENT; 287 return -ENOENT;
620 return len; 288 return len;
@@ -624,8 +292,7 @@ EXPORT_SYMBOL(svc_sock_names);
624/* 292/*
625 * Check input queue length 293 * Check input queue length
626 */ 294 */
627static int 295static int svc_recv_available(struct svc_sock *svsk)
628svc_recv_available(struct svc_sock *svsk)
629{ 296{
630 struct socket *sock = svsk->sk_sock; 297 struct socket *sock = svsk->sk_sock;
631 int avail, err; 298 int avail, err;
@@ -638,48 +305,31 @@ svc_recv_available(struct svc_sock *svsk)
638/* 305/*
639 * Generic recvfrom routine. 306 * Generic recvfrom routine.
640 */ 307 */
641static int 308static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
642svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) 309 int buflen)
643{ 310{
644 struct svc_sock *svsk = rqstp->rq_sock; 311 struct svc_sock *svsk =
312 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
645 struct msghdr msg = { 313 struct msghdr msg = {
646 .msg_flags = MSG_DONTWAIT, 314 .msg_flags = MSG_DONTWAIT,
647 }; 315 };
648 struct sockaddr *sin;
649 int len; 316 int len;
650 317
318 rqstp->rq_xprt_hlen = 0;
319
651 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 320 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
652 msg.msg_flags); 321 msg.msg_flags);
653 322
654 /* sock_recvmsg doesn't fill in the name/namelen, so we must..
655 */
656 memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
657 rqstp->rq_addrlen = svsk->sk_remotelen;
658
659 /* Destination address in request is needed for binding the
660 * source address in RPC callbacks later.
661 */
662 sin = (struct sockaddr *)&svsk->sk_local;
663 switch (sin->sa_family) {
664 case AF_INET:
665 rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
666 break;
667 case AF_INET6:
668 rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
669 break;
670 }
671
672 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 323 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
673 svsk, iov[0].iov_base, iov[0].iov_len, len); 324 svsk, iov[0].iov_base, iov[0].iov_len, len);
674
675 return len; 325 return len;
676} 326}
677 327
678/* 328/*
679 * Set socket snd and rcv buffer lengths 329 * Set socket snd and rcv buffer lengths
680 */ 330 */
681static inline void 331static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
682svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) 332 unsigned int rcv)
683{ 333{
684#if 0 334#if 0
685 mm_segment_t oldfs; 335 mm_segment_t oldfs;
@@ -704,16 +354,16 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv)
704/* 354/*
705 * INET callback when data has been received on the socket. 355 * INET callback when data has been received on the socket.
706 */ 356 */
707static void 357static void svc_udp_data_ready(struct sock *sk, int count)
708svc_udp_data_ready(struct sock *sk, int count)
709{ 358{
710 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 359 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
711 360
712 if (svsk) { 361 if (svsk) {
713 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", 362 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
714 svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); 363 svsk, sk, count,
715 set_bit(SK_DATA, &svsk->sk_flags); 364 test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
716 svc_sock_enqueue(svsk); 365 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
366 svc_xprt_enqueue(&svsk->sk_xprt);
717 } 367 }
718 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 368 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
719 wake_up_interruptible(sk->sk_sleep); 369 wake_up_interruptible(sk->sk_sleep);
@@ -722,15 +372,14 @@ svc_udp_data_ready(struct sock *sk, int count)
722/* 372/*
723 * INET callback when space is newly available on the socket. 373 * INET callback when space is newly available on the socket.
724 */ 374 */
725static void 375static void svc_write_space(struct sock *sk)
726svc_write_space(struct sock *sk)
727{ 376{
728 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 377 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
729 378
730 if (svsk) { 379 if (svsk) {
731 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", 380 dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
732 svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); 381 svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
733 svc_sock_enqueue(svsk); 382 svc_xprt_enqueue(&svsk->sk_xprt);
734 } 383 }
735 384
736 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { 385 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
@@ -740,10 +389,19 @@ svc_write_space(struct sock *sk)
740 } 389 }
741} 390}
742 391
743static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, 392/*
744 struct cmsghdr *cmh) 393 * Copy the UDP datagram's destination address to the rqstp structure.
394 * The 'destination' address in this case is the address to which the
395 * peer sent the datagram, i.e. our local address. For multihomed
396 * hosts, this can change from msg to msg. Note that only the IP
397 * address changes, the port number should remain the same.
398 */
399static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
400 struct cmsghdr *cmh)
745{ 401{
746 switch (rqstp->rq_sock->sk_sk->sk_family) { 402 struct svc_sock *svsk =
403 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
404 switch (svsk->sk_sk->sk_family) {
747 case AF_INET: { 405 case AF_INET: {
748 struct in_pktinfo *pki = CMSG_DATA(cmh); 406 struct in_pktinfo *pki = CMSG_DATA(cmh);
749 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; 407 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
@@ -760,11 +418,11 @@ static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
760/* 418/*
761 * Receive a datagram from a UDP socket. 419 * Receive a datagram from a UDP socket.
762 */ 420 */
763static int 421static int svc_udp_recvfrom(struct svc_rqst *rqstp)
764svc_udp_recvfrom(struct svc_rqst *rqstp)
765{ 422{
766 struct svc_sock *svsk = rqstp->rq_sock; 423 struct svc_sock *svsk =
767 struct svc_serv *serv = svsk->sk_server; 424 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
425 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
768 struct sk_buff *skb; 426 struct sk_buff *skb;
769 union { 427 union {
770 struct cmsghdr hdr; 428 struct cmsghdr hdr;
@@ -779,7 +437,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
779 .msg_flags = MSG_DONTWAIT, 437 .msg_flags = MSG_DONTWAIT,
780 }; 438 };
781 439
782 if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) 440 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
783 /* udp sockets need large rcvbuf as all pending 441 /* udp sockets need large rcvbuf as all pending
784 * requests are still in that buffer. sndbuf must 442 * requests are still in that buffer. sndbuf must
785 * also be large enough that there is enough space 443 * also be large enough that there is enough space
@@ -792,17 +450,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
792 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 450 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
793 (serv->sv_nrthreads+3) * serv->sv_max_mesg); 451 (serv->sv_nrthreads+3) * serv->sv_max_mesg);
794 452
795 if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { 453 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
796 svc_sock_received(svsk);
797 return svc_deferred_recv(rqstp);
798 }
799
800 if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
801 svc_delete_socket(svsk);
802 return 0;
803 }
804
805 clear_bit(SK_DATA, &svsk->sk_flags);
806 skb = NULL; 454 skb = NULL;
807 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 455 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
808 0, 0, MSG_PEEK | MSG_DONTWAIT); 456 0, 0, MSG_PEEK | MSG_DONTWAIT);
@@ -813,24 +461,27 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
813 if (err != -EAGAIN) { 461 if (err != -EAGAIN) {
814 /* possibly an icmp error */ 462 /* possibly an icmp error */
815 dprintk("svc: recvfrom returned error %d\n", -err); 463 dprintk("svc: recvfrom returned error %d\n", -err);
816 set_bit(SK_DATA, &svsk->sk_flags); 464 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
817 } 465 }
818 svc_sock_received(svsk); 466 svc_xprt_received(&svsk->sk_xprt);
819 return -EAGAIN; 467 return -EAGAIN;
820 } 468 }
821 rqstp->rq_addrlen = sizeof(rqstp->rq_addr); 469 len = svc_addr_len(svc_addr(rqstp));
470 if (len < 0)
471 return len;
472 rqstp->rq_addrlen = len;
822 if (skb->tstamp.tv64 == 0) { 473 if (skb->tstamp.tv64 == 0) {
823 skb->tstamp = ktime_get_real(); 474 skb->tstamp = ktime_get_real();
824 /* Don't enable netstamp, sunrpc doesn't 475 /* Don't enable netstamp, sunrpc doesn't
825 need that much accuracy */ 476 need that much accuracy */
826 } 477 }
827 svsk->sk_sk->sk_stamp = skb->tstamp; 478 svsk->sk_sk->sk_stamp = skb->tstamp;
828 set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ 479 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
829 480
830 /* 481 /*
831 * Maybe more packets - kick another thread ASAP. 482 * Maybe more packets - kick another thread ASAP.
832 */ 483 */
833 svc_sock_received(svsk); 484 svc_xprt_received(&svsk->sk_xprt);
834 485
835 len = skb->len - sizeof(struct udphdr); 486 len = skb->len - sizeof(struct udphdr);
836 rqstp->rq_arg.len = len; 487 rqstp->rq_arg.len = len;
@@ -861,13 +512,14 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
861 skb_free_datagram(svsk->sk_sk, skb); 512 skb_free_datagram(svsk->sk_sk, skb);
862 } else { 513 } else {
863 /* we can use it in-place */ 514 /* we can use it in-place */
864 rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); 515 rqstp->rq_arg.head[0].iov_base = skb->data +
516 sizeof(struct udphdr);
865 rqstp->rq_arg.head[0].iov_len = len; 517 rqstp->rq_arg.head[0].iov_len = len;
866 if (skb_checksum_complete(skb)) { 518 if (skb_checksum_complete(skb)) {
867 skb_free_datagram(svsk->sk_sk, skb); 519 skb_free_datagram(svsk->sk_sk, skb);
868 return 0; 520 return 0;
869 } 521 }
870 rqstp->rq_skbuff = skb; 522 rqstp->rq_xprt_ctxt = skb;
871 } 523 }
872 524
873 rqstp->rq_arg.page_base = 0; 525 rqstp->rq_arg.page_base = 0;
@@ -900,27 +552,81 @@ svc_udp_sendto(struct svc_rqst *rqstp)
900 return error; 552 return error;
901} 553}
902 554
903static void 555static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
904svc_udp_init(struct svc_sock *svsk) 556{
557}
558
559static int svc_udp_has_wspace(struct svc_xprt *xprt)
560{
561 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
562 struct svc_serv *serv = xprt->xpt_server;
563 unsigned long required;
564
565 /*
566 * Set the SOCK_NOSPACE flag before checking the available
567 * sock space.
568 */
569 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
570 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
571 if (required*2 > sock_wspace(svsk->sk_sk))
572 return 0;
573 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
574 return 1;
575}
576
577static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
578{
579 BUG();
580 return NULL;
581}
582
583static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
584 struct sockaddr *sa, int salen,
585 int flags)
586{
587 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags);
588}
589
590static struct svc_xprt_ops svc_udp_ops = {
591 .xpo_create = svc_udp_create,
592 .xpo_recvfrom = svc_udp_recvfrom,
593 .xpo_sendto = svc_udp_sendto,
594 .xpo_release_rqst = svc_release_skb,
595 .xpo_detach = svc_sock_detach,
596 .xpo_free = svc_sock_free,
597 .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
598 .xpo_has_wspace = svc_udp_has_wspace,
599 .xpo_accept = svc_udp_accept,
600};
601
602static struct svc_xprt_class svc_udp_class = {
603 .xcl_name = "udp",
604 .xcl_owner = THIS_MODULE,
605 .xcl_ops = &svc_udp_ops,
606 .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
607};
608
609static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
905{ 610{
906 int one = 1; 611 int one = 1;
907 mm_segment_t oldfs; 612 mm_segment_t oldfs;
908 613
614 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
615 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
909 svsk->sk_sk->sk_data_ready = svc_udp_data_ready; 616 svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
910 svsk->sk_sk->sk_write_space = svc_write_space; 617 svsk->sk_sk->sk_write_space = svc_write_space;
911 svsk->sk_recvfrom = svc_udp_recvfrom;
912 svsk->sk_sendto = svc_udp_sendto;
913 618
914 /* initialise setting must have enough space to 619 /* initialise setting must have enough space to
915 * receive and respond to one request. 620 * receive and respond to one request.
916 * svc_udp_recvfrom will re-adjust if necessary 621 * svc_udp_recvfrom will re-adjust if necessary
917 */ 622 */
918 svc_sock_setbufsize(svsk->sk_sock, 623 svc_sock_setbufsize(svsk->sk_sock,
919 3 * svsk->sk_server->sv_max_mesg, 624 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
920 3 * svsk->sk_server->sv_max_mesg); 625 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
921 626
922 set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ 627 /* data might have come in before data_ready set up */
923 set_bit(SK_CHNGBUF, &svsk->sk_flags); 628 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
629 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
924 630
925 oldfs = get_fs(); 631 oldfs = get_fs();
926 set_fs(KERNEL_DS); 632 set_fs(KERNEL_DS);
@@ -934,8 +640,7 @@ svc_udp_init(struct svc_sock *svsk)
934 * A data_ready event on a listening socket means there's a connection 640 * A data_ready event on a listening socket means there's a connection
935 * pending. Do not use state_change as a substitute for it. 641 * pending. Do not use state_change as a substitute for it.
936 */ 642 */
937static void 643static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
938svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
939{ 644{
940 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 645 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
941 646
@@ -954,8 +659,8 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
954 */ 659 */
955 if (sk->sk_state == TCP_LISTEN) { 660 if (sk->sk_state == TCP_LISTEN) {
956 if (svsk) { 661 if (svsk) {
957 set_bit(SK_CONN, &svsk->sk_flags); 662 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
958 svc_sock_enqueue(svsk); 663 svc_xprt_enqueue(&svsk->sk_xprt);
959 } else 664 } else
960 printk("svc: socket %p: no user data\n", sk); 665 printk("svc: socket %p: no user data\n", sk);
961 } 666 }
@@ -967,8 +672,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
967/* 672/*
968 * A state change on a connected socket means it's dying or dead. 673 * A state change on a connected socket means it's dying or dead.
969 */ 674 */
970static void 675static void svc_tcp_state_change(struct sock *sk)
971svc_tcp_state_change(struct sock *sk)
972{ 676{
973 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 677 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
974 678
@@ -978,51 +682,36 @@ svc_tcp_state_change(struct sock *sk)
978 if (!svsk) 682 if (!svsk)
979 printk("svc: socket %p: no user data\n", sk); 683 printk("svc: socket %p: no user data\n", sk);
980 else { 684 else {
981 set_bit(SK_CLOSE, &svsk->sk_flags); 685 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
982 svc_sock_enqueue(svsk); 686 svc_xprt_enqueue(&svsk->sk_xprt);
983 } 687 }
984 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 688 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
985 wake_up_interruptible_all(sk->sk_sleep); 689 wake_up_interruptible_all(sk->sk_sleep);
986} 690}
987 691
988static void 692static void svc_tcp_data_ready(struct sock *sk, int count)
989svc_tcp_data_ready(struct sock *sk, int count)
990{ 693{
991 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 694 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
992 695
993 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 696 dprintk("svc: socket %p TCP data ready (svsk %p)\n",
994 sk, sk->sk_user_data); 697 sk, sk->sk_user_data);
995 if (svsk) { 698 if (svsk) {
996 set_bit(SK_DATA, &svsk->sk_flags); 699 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
997 svc_sock_enqueue(svsk); 700 svc_xprt_enqueue(&svsk->sk_xprt);
998 } 701 }
999 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 702 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1000 wake_up_interruptible(sk->sk_sleep); 703 wake_up_interruptible(sk->sk_sleep);
1001} 704}
1002 705
1003static inline int svc_port_is_privileged(struct sockaddr *sin)
1004{
1005 switch (sin->sa_family) {
1006 case AF_INET:
1007 return ntohs(((struct sockaddr_in *)sin)->sin_port)
1008 < PROT_SOCK;
1009 case AF_INET6:
1010 return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
1011 < PROT_SOCK;
1012 default:
1013 return 0;
1014 }
1015}
1016
1017/* 706/*
1018 * Accept a TCP connection 707 * Accept a TCP connection
1019 */ 708 */
1020static void 709static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
1021svc_tcp_accept(struct svc_sock *svsk)
1022{ 710{
711 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1023 struct sockaddr_storage addr; 712 struct sockaddr_storage addr;
1024 struct sockaddr *sin = (struct sockaddr *) &addr; 713 struct sockaddr *sin = (struct sockaddr *) &addr;
1025 struct svc_serv *serv = svsk->sk_server; 714 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1026 struct socket *sock = svsk->sk_sock; 715 struct socket *sock = svsk->sk_sock;
1027 struct socket *newsock; 716 struct socket *newsock;
1028 struct svc_sock *newsvsk; 717 struct svc_sock *newsvsk;
@@ -1031,9 +720,9 @@ svc_tcp_accept(struct svc_sock *svsk)
1031 720
1032 dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); 721 dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
1033 if (!sock) 722 if (!sock)
1034 return; 723 return NULL;
1035 724
1036 clear_bit(SK_CONN, &svsk->sk_flags); 725 clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1037 err = kernel_accept(sock, &newsock, O_NONBLOCK); 726 err = kernel_accept(sock, &newsock, O_NONBLOCK);
1038 if (err < 0) { 727 if (err < 0) {
1039 if (err == -ENOMEM) 728 if (err == -ENOMEM)
@@ -1042,11 +731,9 @@ svc_tcp_accept(struct svc_sock *svsk)
1042 else if (err != -EAGAIN && net_ratelimit()) 731 else if (err != -EAGAIN && net_ratelimit())
1043 printk(KERN_WARNING "%s: accept failed (err %d)!\n", 732 printk(KERN_WARNING "%s: accept failed (err %d)!\n",
1044 serv->sv_name, -err); 733 serv->sv_name, -err);
1045 return; 734 return NULL;
1046 } 735 }
1047 736 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1048 set_bit(SK_CONN, &svsk->sk_flags);
1049 svc_sock_enqueue(svsk);
1050 737
1051 err = kernel_getpeername(newsock, sin, &slen); 738 err = kernel_getpeername(newsock, sin, &slen);
1052 if (err < 0) { 739 if (err < 0) {
@@ -1077,106 +764,42 @@ svc_tcp_accept(struct svc_sock *svsk)
1077 if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 764 if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
1078 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) 765 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
1079 goto failed; 766 goto failed;
1080 memcpy(&newsvsk->sk_remote, sin, slen); 767 svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
1081 newsvsk->sk_remotelen = slen;
1082 err = kernel_getsockname(newsock, sin, &slen); 768 err = kernel_getsockname(newsock, sin, &slen);
1083 if (unlikely(err < 0)) { 769 if (unlikely(err < 0)) {
1084 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); 770 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
1085 slen = offsetof(struct sockaddr, sa_data); 771 slen = offsetof(struct sockaddr, sa_data);
1086 } 772 }
1087 memcpy(&newsvsk->sk_local, sin, slen); 773 svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
1088
1089 svc_sock_received(newsvsk);
1090
1091 /* make sure that we don't have too many active connections.
1092 * If we have, something must be dropped.
1093 *
1094 * There's no point in trying to do random drop here for
1095 * DoS prevention. The NFS clients does 1 reconnect in 15
1096 * seconds. An attacker can easily beat that.
1097 *
1098 * The only somewhat efficient mechanism would be if drop
1099 * old connections from the same IP first. But right now
1100 * we don't even record the client IP in svc_sock.
1101 */
1102 if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
1103 struct svc_sock *svsk = NULL;
1104 spin_lock_bh(&serv->sv_lock);
1105 if (!list_empty(&serv->sv_tempsocks)) {
1106 if (net_ratelimit()) {
1107 /* Try to help the admin */
1108 printk(KERN_NOTICE "%s: too many open TCP "
1109 "sockets, consider increasing the "
1110 "number of nfsd threads\n",
1111 serv->sv_name);
1112 printk(KERN_NOTICE
1113 "%s: last TCP connect from %s\n",
1114 serv->sv_name, __svc_print_addr(sin,
1115 buf, sizeof(buf)));
1116 }
1117 /*
1118 * Always select the oldest socket. It's not fair,
1119 * but so is life
1120 */
1121 svsk = list_entry(serv->sv_tempsocks.prev,
1122 struct svc_sock,
1123 sk_list);
1124 set_bit(SK_CLOSE, &svsk->sk_flags);
1125 atomic_inc(&svsk->sk_inuse);
1126 }
1127 spin_unlock_bh(&serv->sv_lock);
1128
1129 if (svsk) {
1130 svc_sock_enqueue(svsk);
1131 svc_sock_put(svsk);
1132 }
1133
1134 }
1135 774
1136 if (serv->sv_stats) 775 if (serv->sv_stats)
1137 serv->sv_stats->nettcpconn++; 776 serv->sv_stats->nettcpconn++;
1138 777
1139 return; 778 return &newsvsk->sk_xprt;
1140 779
1141failed: 780failed:
1142 sock_release(newsock); 781 sock_release(newsock);
1143 return; 782 return NULL;
1144} 783}
1145 784
1146/* 785/*
1147 * Receive data from a TCP socket. 786 * Receive data from a TCP socket.
1148 */ 787 */
1149static int 788static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1150svc_tcp_recvfrom(struct svc_rqst *rqstp)
1151{ 789{
1152 struct svc_sock *svsk = rqstp->rq_sock; 790 struct svc_sock *svsk =
1153 struct svc_serv *serv = svsk->sk_server; 791 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
792 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1154 int len; 793 int len;
1155 struct kvec *vec; 794 struct kvec *vec;
1156 int pnum, vlen; 795 int pnum, vlen;
1157 796
1158 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 797 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1159 svsk, test_bit(SK_DATA, &svsk->sk_flags), 798 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
1160 test_bit(SK_CONN, &svsk->sk_flags), 799 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
1161 test_bit(SK_CLOSE, &svsk->sk_flags)); 800 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
1162 801
1163 if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { 802 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
1164 svc_sock_received(svsk);
1165 return svc_deferred_recv(rqstp);
1166 }
1167
1168 if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
1169 svc_delete_socket(svsk);
1170 return 0;
1171 }
1172
1173 if (svsk->sk_sk->sk_state == TCP_LISTEN) {
1174 svc_tcp_accept(svsk);
1175 svc_sock_received(svsk);
1176 return 0;
1177 }
1178
1179 if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
1180 /* sndbuf needs to have room for one request 803 /* sndbuf needs to have room for one request
1181 * per thread, otherwise we can stall even when the 804 * per thread, otherwise we can stall even when the
1182 * network isn't a bottleneck. 805 * network isn't a bottleneck.
@@ -1193,7 +816,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1193 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 816 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
1194 3 * serv->sv_max_mesg); 817 3 * serv->sv_max_mesg);
1195 818
1196 clear_bit(SK_DATA, &svsk->sk_flags); 819 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1197 820
1198 /* Receive data. If we haven't got the record length yet, get 821 /* Receive data. If we haven't got the record length yet, get
1199 * the next four bytes. Otherwise try to gobble up as much as 822 * the next four bytes. Otherwise try to gobble up as much as
@@ -1212,7 +835,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1212 if (len < want) { 835 if (len < want) {
1213 dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", 836 dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
1214 len, want); 837 len, want);
1215 svc_sock_received(svsk); 838 svc_xprt_received(&svsk->sk_xprt);
1216 return -EAGAIN; /* record header not complete */ 839 return -EAGAIN; /* record header not complete */
1217 } 840 }
1218 841
@@ -1248,11 +871,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1248 if (len < svsk->sk_reclen) { 871 if (len < svsk->sk_reclen) {
1249 dprintk("svc: incomplete TCP record (%d of %d)\n", 872 dprintk("svc: incomplete TCP record (%d of %d)\n",
1250 len, svsk->sk_reclen); 873 len, svsk->sk_reclen);
1251 svc_sock_received(svsk); 874 svc_xprt_received(&svsk->sk_xprt);
1252 return -EAGAIN; /* record not complete */ 875 return -EAGAIN; /* record not complete */
1253 } 876 }
1254 len = svsk->sk_reclen; 877 len = svsk->sk_reclen;
1255 set_bit(SK_DATA, &svsk->sk_flags); 878 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1256 879
1257 vec = rqstp->rq_vec; 880 vec = rqstp->rq_vec;
1258 vec[0] = rqstp->rq_arg.head[0]; 881 vec[0] = rqstp->rq_arg.head[0];
@@ -1281,30 +904,31 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1281 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 904 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
1282 } 905 }
1283 906
1284 rqstp->rq_skbuff = NULL; 907 rqstp->rq_xprt_ctxt = NULL;
1285 rqstp->rq_prot = IPPROTO_TCP; 908 rqstp->rq_prot = IPPROTO_TCP;
1286 909
1287 /* Reset TCP read info */ 910 /* Reset TCP read info */
1288 svsk->sk_reclen = 0; 911 svsk->sk_reclen = 0;
1289 svsk->sk_tcplen = 0; 912 svsk->sk_tcplen = 0;
1290 913
1291 svc_sock_received(svsk); 914 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
915 svc_xprt_received(&svsk->sk_xprt);
1292 if (serv->sv_stats) 916 if (serv->sv_stats)
1293 serv->sv_stats->nettcpcnt++; 917 serv->sv_stats->nettcpcnt++;
1294 918
1295 return len; 919 return len;
1296 920
1297 err_delete: 921 err_delete:
1298 svc_delete_socket(svsk); 922 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1299 return -EAGAIN; 923 return -EAGAIN;
1300 924
1301 error: 925 error:
1302 if (len == -EAGAIN) { 926 if (len == -EAGAIN) {
1303 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 927 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1304 svc_sock_received(svsk); 928 svc_xprt_received(&svsk->sk_xprt);
1305 } else { 929 } else {
1306 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 930 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1307 svsk->sk_server->sv_name, -len); 931 svsk->sk_xprt.xpt_server->sv_name, -len);
1308 goto err_delete; 932 goto err_delete;
1309 } 933 }
1310 934
@@ -1314,8 +938,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1314/* 938/*
1315 * Send out data on TCP socket. 939 * Send out data on TCP socket.
1316 */ 940 */
1317static int 941static int svc_tcp_sendto(struct svc_rqst *rqstp)
1318svc_tcp_sendto(struct svc_rqst *rqstp)
1319{ 942{
1320 struct xdr_buf *xbufp = &rqstp->rq_res; 943 struct xdr_buf *xbufp = &rqstp->rq_res;
1321 int sent; 944 int sent;
@@ -1328,35 +951,109 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
1328 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 951 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1329 memcpy(xbufp->head[0].iov_base, &reclen, 4); 952 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1330 953
1331 if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) 954 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
1332 return -ENOTCONN; 955 return -ENOTCONN;
1333 956
1334 sent = svc_sendto(rqstp, &rqstp->rq_res); 957 sent = svc_sendto(rqstp, &rqstp->rq_res);
1335 if (sent != xbufp->len) { 958 if (sent != xbufp->len) {
1336 printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", 959 printk(KERN_NOTICE
1337 rqstp->rq_sock->sk_server->sv_name, 960 "rpc-srv/tcp: %s: %s %d when sending %d bytes "
961 "- shutting down socket\n",
962 rqstp->rq_xprt->xpt_server->sv_name,
1338 (sent<0)?"got error":"sent only", 963 (sent<0)?"got error":"sent only",
1339 sent, xbufp->len); 964 sent, xbufp->len);
1340 set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); 965 set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
1341 svc_sock_enqueue(rqstp->rq_sock); 966 svc_xprt_enqueue(rqstp->rq_xprt);
1342 sent = -EAGAIN; 967 sent = -EAGAIN;
1343 } 968 }
1344 return sent; 969 return sent;
1345} 970}
1346 971
1347static void 972/*
1348svc_tcp_init(struct svc_sock *svsk) 973 * Setup response header. TCP has a 4B record length field.
974 */
975static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
976{
977 struct kvec *resv = &rqstp->rq_res.head[0];
978
979 /* tcp needs a space for the record length... */
980 svc_putnl(resv, 0);
981}
982
983static int svc_tcp_has_wspace(struct svc_xprt *xprt)
984{
985 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
986 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
987 int required;
988 int wspace;
989
990 /*
991 * Set the SOCK_NOSPACE flag before checking the available
992 * sock space.
993 */
994 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
995 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
996 wspace = sk_stream_wspace(svsk->sk_sk);
997
998 if (wspace < sk_stream_min_wspace(svsk->sk_sk))
999 return 0;
1000 if (required * 2 > wspace)
1001 return 0;
1002
1003 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
1004 return 1;
1005}
1006
1007static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1008 struct sockaddr *sa, int salen,
1009 int flags)
1010{
1011 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags);
1012}
1013
1014static struct svc_xprt_ops svc_tcp_ops = {
1015 .xpo_create = svc_tcp_create,
1016 .xpo_recvfrom = svc_tcp_recvfrom,
1017 .xpo_sendto = svc_tcp_sendto,
1018 .xpo_release_rqst = svc_release_skb,
1019 .xpo_detach = svc_sock_detach,
1020 .xpo_free = svc_sock_free,
1021 .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
1022 .xpo_has_wspace = svc_tcp_has_wspace,
1023 .xpo_accept = svc_tcp_accept,
1024};
1025
1026static struct svc_xprt_class svc_tcp_class = {
1027 .xcl_name = "tcp",
1028 .xcl_owner = THIS_MODULE,
1029 .xcl_ops = &svc_tcp_ops,
1030 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
1031};
1032
1033void svc_init_xprt_sock(void)
1034{
1035 svc_reg_xprt_class(&svc_tcp_class);
1036 svc_reg_xprt_class(&svc_udp_class);
1037}
1038
1039void svc_cleanup_xprt_sock(void)
1040{
1041 svc_unreg_xprt_class(&svc_tcp_class);
1042 svc_unreg_xprt_class(&svc_udp_class);
1043}
1044
1045static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1349{ 1046{
1350 struct sock *sk = svsk->sk_sk; 1047 struct sock *sk = svsk->sk_sk;
1351 struct tcp_sock *tp = tcp_sk(sk); 1048 struct tcp_sock *tp = tcp_sk(sk);
1352 1049
1353 svsk->sk_recvfrom = svc_tcp_recvfrom; 1050 svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
1354 svsk->sk_sendto = svc_tcp_sendto; 1051 set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
1355
1356 if (sk->sk_state == TCP_LISTEN) { 1052 if (sk->sk_state == TCP_LISTEN) {
1357 dprintk("setting up TCP socket for listening\n"); 1053 dprintk("setting up TCP socket for listening\n");
1054 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
1358 sk->sk_data_ready = svc_tcp_listen_data_ready; 1055 sk->sk_data_ready = svc_tcp_listen_data_ready;
1359 set_bit(SK_CONN, &svsk->sk_flags); 1056 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1360 } else { 1057 } else {
1361 dprintk("setting up TCP socket for reading\n"); 1058 dprintk("setting up TCP socket for reading\n");
1362 sk->sk_state_change = svc_tcp_state_change; 1059 sk->sk_state_change = svc_tcp_state_change;
@@ -1373,18 +1070,17 @@ svc_tcp_init(struct svc_sock *svsk)
1373 * svc_tcp_recvfrom will re-adjust if necessary 1070 * svc_tcp_recvfrom will re-adjust if necessary
1374 */ 1071 */
1375 svc_sock_setbufsize(svsk->sk_sock, 1072 svc_sock_setbufsize(svsk->sk_sock,
1376 3 * svsk->sk_server->sv_max_mesg, 1073 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1377 3 * svsk->sk_server->sv_max_mesg); 1074 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1378 1075
1379 set_bit(SK_CHNGBUF, &svsk->sk_flags); 1076 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1380 set_bit(SK_DATA, &svsk->sk_flags); 1077 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1381 if (sk->sk_state != TCP_ESTABLISHED) 1078 if (sk->sk_state != TCP_ESTABLISHED)
1382 set_bit(SK_CLOSE, &svsk->sk_flags); 1079 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1383 } 1080 }
1384} 1081}
1385 1082
1386void 1083void svc_sock_update_bufs(struct svc_serv *serv)
1387svc_sock_update_bufs(struct svc_serv *serv)
1388{ 1084{
1389 /* 1085 /*
1390 * The number of server threads has changed. Update 1086 * The number of server threads has changed. Update
@@ -1395,232 +1091,18 @@ svc_sock_update_bufs(struct svc_serv *serv)
1395 spin_lock_bh(&serv->sv_lock); 1091 spin_lock_bh(&serv->sv_lock);
1396 list_for_each(le, &serv->sv_permsocks) { 1092 list_for_each(le, &serv->sv_permsocks) {
1397 struct svc_sock *svsk = 1093 struct svc_sock *svsk =
1398 list_entry(le, struct svc_sock, sk_list); 1094 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1399 set_bit(SK_CHNGBUF, &svsk->sk_flags); 1095 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1400 } 1096 }
1401 list_for_each(le, &serv->sv_tempsocks) { 1097 list_for_each(le, &serv->sv_tempsocks) {
1402 struct svc_sock *svsk = 1098 struct svc_sock *svsk =
1403 list_entry(le, struct svc_sock, sk_list); 1099 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1404 set_bit(SK_CHNGBUF, &svsk->sk_flags); 1100 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1405 } 1101 }
1406 spin_unlock_bh(&serv->sv_lock); 1102 spin_unlock_bh(&serv->sv_lock);
1407} 1103}
1408 1104
1409/* 1105/*
1410 * Receive the next request on any socket. This code is carefully
1411 * organised not to touch any cachelines in the shared svc_serv
1412 * structure, only cachelines in the local svc_pool.
1413 */
1414int
1415svc_recv(struct svc_rqst *rqstp, long timeout)
1416{
1417 struct svc_sock *svsk = NULL;
1418 struct svc_serv *serv = rqstp->rq_server;
1419 struct svc_pool *pool = rqstp->rq_pool;
1420 int len, i;
1421 int pages;
1422 struct xdr_buf *arg;
1423 DECLARE_WAITQUEUE(wait, current);
1424
1425 dprintk("svc: server %p waiting for data (to = %ld)\n",
1426 rqstp, timeout);
1427
1428 if (rqstp->rq_sock)
1429 printk(KERN_ERR
1430 "svc_recv: service %p, socket not NULL!\n",
1431 rqstp);
1432 if (waitqueue_active(&rqstp->rq_wait))
1433 printk(KERN_ERR
1434 "svc_recv: service %p, wait queue active!\n",
1435 rqstp);
1436
1437
1438 /* now allocate needed pages. If we get a failure, sleep briefly */
1439 pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
1440 for (i=0; i < pages ; i++)
1441 while (rqstp->rq_pages[i] == NULL) {
1442 struct page *p = alloc_page(GFP_KERNEL);
1443 if (!p)
1444 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
1445 rqstp->rq_pages[i] = p;
1446 }
1447 rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
1448 BUG_ON(pages >= RPCSVC_MAXPAGES);
1449
1450 /* Make arg->head point to first page and arg->pages point to rest */
1451 arg = &rqstp->rq_arg;
1452 arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
1453 arg->head[0].iov_len = PAGE_SIZE;
1454 arg->pages = rqstp->rq_pages + 1;
1455 arg->page_base = 0;
1456 /* save at least one page for response */
1457 arg->page_len = (pages-2)*PAGE_SIZE;
1458 arg->len = (pages-1)*PAGE_SIZE;
1459 arg->tail[0].iov_len = 0;
1460
1461 try_to_freeze();
1462 cond_resched();
1463 if (signalled())
1464 return -EINTR;
1465
1466 spin_lock_bh(&pool->sp_lock);
1467 if ((svsk = svc_sock_dequeue(pool)) != NULL) {
1468 rqstp->rq_sock = svsk;
1469 atomic_inc(&svsk->sk_inuse);
1470 rqstp->rq_reserved = serv->sv_max_mesg;
1471 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
1472 } else {
1473 /* No data pending. Go to sleep */
1474 svc_thread_enqueue(pool, rqstp);
1475
1476 /*
1477 * We have to be able to interrupt this wait
1478 * to bring down the daemons ...
1479 */
1480 set_current_state(TASK_INTERRUPTIBLE);
1481 add_wait_queue(&rqstp->rq_wait, &wait);
1482 spin_unlock_bh(&pool->sp_lock);
1483
1484 schedule_timeout(timeout);
1485
1486 try_to_freeze();
1487
1488 spin_lock_bh(&pool->sp_lock);
1489 remove_wait_queue(&rqstp->rq_wait, &wait);
1490
1491 if (!(svsk = rqstp->rq_sock)) {
1492 svc_thread_dequeue(pool, rqstp);
1493 spin_unlock_bh(&pool->sp_lock);
1494 dprintk("svc: server %p, no data yet\n", rqstp);
1495 return signalled()? -EINTR : -EAGAIN;
1496 }
1497 }
1498 spin_unlock_bh(&pool->sp_lock);
1499
1500 dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
1501 rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
1502 len = svsk->sk_recvfrom(rqstp);
1503 dprintk("svc: got len=%d\n", len);
1504
1505 /* No data, incomplete (TCP) read, or accept() */
1506 if (len == 0 || len == -EAGAIN) {
1507 rqstp->rq_res.len = 0;
1508 svc_sock_release(rqstp);
1509 return -EAGAIN;
1510 }
1511 svsk->sk_lastrecv = get_seconds();
1512 clear_bit(SK_OLD, &svsk->sk_flags);
1513
1514 rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
1515 rqstp->rq_chandle.defer = svc_defer;
1516
1517 if (serv->sv_stats)
1518 serv->sv_stats->netcnt++;
1519 return len;
1520}
1521
1522/*
1523 * Drop request
1524 */
1525void
1526svc_drop(struct svc_rqst *rqstp)
1527{
1528 dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
1529 svc_sock_release(rqstp);
1530}
1531
1532/*
1533 * Return reply to client.
1534 */
1535int
1536svc_send(struct svc_rqst *rqstp)
1537{
1538 struct svc_sock *svsk;
1539 int len;
1540 struct xdr_buf *xb;
1541
1542 if ((svsk = rqstp->rq_sock) == NULL) {
1543 printk(KERN_WARNING "NULL socket pointer in %s:%d\n",
1544 __FILE__, __LINE__);
1545 return -EFAULT;
1546 }
1547
1548 /* release the receive skb before sending the reply */
1549 svc_release_skb(rqstp);
1550
1551 /* calculate over-all length */
1552 xb = & rqstp->rq_res;
1553 xb->len = xb->head[0].iov_len +
1554 xb->page_len +
1555 xb->tail[0].iov_len;
1556
1557 /* Grab svsk->sk_mutex to serialize outgoing data. */
1558 mutex_lock(&svsk->sk_mutex);
1559 if (test_bit(SK_DEAD, &svsk->sk_flags))
1560 len = -ENOTCONN;
1561 else
1562 len = svsk->sk_sendto(rqstp);
1563 mutex_unlock(&svsk->sk_mutex);
1564 svc_sock_release(rqstp);
1565
1566 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
1567 return 0;
1568 return len;
1569}
1570
1571/*
1572 * Timer function to close old temporary sockets, using
1573 * a mark-and-sweep algorithm.
1574 */
1575static void
1576svc_age_temp_sockets(unsigned long closure)
1577{
1578 struct svc_serv *serv = (struct svc_serv *)closure;
1579 struct svc_sock *svsk;
1580 struct list_head *le, *next;
1581 LIST_HEAD(to_be_aged);
1582
1583 dprintk("svc_age_temp_sockets\n");
1584
1585 if (!spin_trylock_bh(&serv->sv_lock)) {
1586 /* busy, try again 1 sec later */
1587 dprintk("svc_age_temp_sockets: busy\n");
1588 mod_timer(&serv->sv_temptimer, jiffies + HZ);
1589 return;
1590 }
1591
1592 list_for_each_safe(le, next, &serv->sv_tempsocks) {
1593 svsk = list_entry(le, struct svc_sock, sk_list);
1594
1595 if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
1596 continue;
1597 if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags))
1598 continue;
1599 atomic_inc(&svsk->sk_inuse);
1600 list_move(le, &to_be_aged);
1601 set_bit(SK_CLOSE, &svsk->sk_flags);
1602 set_bit(SK_DETACHED, &svsk->sk_flags);
1603 }
1604 spin_unlock_bh(&serv->sv_lock);
1605
1606 while (!list_empty(&to_be_aged)) {
1607 le = to_be_aged.next;
1608 /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
1609 list_del_init(le);
1610 svsk = list_entry(le, struct svc_sock, sk_list);
1611
1612 dprintk("queuing svsk %p for closing, %lu seconds old\n",
1613 svsk, get_seconds() - svsk->sk_lastrecv);
1614
1615 /* a thread will dequeue and close it soon */
1616 svc_sock_enqueue(svsk);
1617 svc_sock_put(svsk);
1618 }
1619
1620 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
1621}
1622
1623/*
1624 * Initialize socket for RPC use and create svc_sock struct 1106 * Initialize socket for RPC use and create svc_sock struct
1625 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. 1107 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1626 */ 1108 */
@@ -1631,7 +1113,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1631 struct svc_sock *svsk; 1113 struct svc_sock *svsk;
1632 struct sock *inet; 1114 struct sock *inet;
1633 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1115 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1634 int is_temporary = flags & SVC_SOCK_TEMPORARY;
1635 1116
1636 dprintk("svc: svc_setup_socket %p\n", sock); 1117 dprintk("svc: svc_setup_socket %p\n", sock);
1637 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1118 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1651,44 +1132,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1651 return NULL; 1132 return NULL;
1652 } 1133 }
1653 1134
1654 set_bit(SK_BUSY, &svsk->sk_flags);
1655 inet->sk_user_data = svsk; 1135 inet->sk_user_data = svsk;
1656 svsk->sk_sock = sock; 1136 svsk->sk_sock = sock;
1657 svsk->sk_sk = inet; 1137 svsk->sk_sk = inet;
1658 svsk->sk_ostate = inet->sk_state_change; 1138 svsk->sk_ostate = inet->sk_state_change;
1659 svsk->sk_odata = inet->sk_data_ready; 1139 svsk->sk_odata = inet->sk_data_ready;
1660 svsk->sk_owspace = inet->sk_write_space; 1140 svsk->sk_owspace = inet->sk_write_space;
1661 svsk->sk_server = serv;
1662 atomic_set(&svsk->sk_inuse, 1);
1663 svsk->sk_lastrecv = get_seconds();
1664 spin_lock_init(&svsk->sk_lock);
1665 INIT_LIST_HEAD(&svsk->sk_deferred);
1666 INIT_LIST_HEAD(&svsk->sk_ready);
1667 mutex_init(&svsk->sk_mutex);
1668 1141
1669 /* Initialize the socket */ 1142 /* Initialize the socket */
1670 if (sock->type == SOCK_DGRAM) 1143 if (sock->type == SOCK_DGRAM)
1671 svc_udp_init(svsk); 1144 svc_udp_init(svsk, serv);
1672 else 1145 else
1673 svc_tcp_init(svsk); 1146 svc_tcp_init(svsk, serv);
1674
1675 spin_lock_bh(&serv->sv_lock);
1676 if (is_temporary) {
1677 set_bit(SK_TEMP, &svsk->sk_flags);
1678 list_add(&svsk->sk_list, &serv->sv_tempsocks);
1679 serv->sv_tmpcnt++;
1680 if (serv->sv_temptimer.function == NULL) {
1681 /* setup timer to age temp sockets */
1682 setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
1683 (unsigned long)serv);
1684 mod_timer(&serv->sv_temptimer,
1685 jiffies + svc_conn_age_period * HZ);
1686 }
1687 } else {
1688 clear_bit(SK_TEMP, &svsk->sk_flags);
1689 list_add(&svsk->sk_list, &serv->sv_permsocks);
1690 }
1691 spin_unlock_bh(&serv->sv_lock);
1692 1147
1693 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1148 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1694 svsk, svsk->sk_sk); 1149 svsk, svsk->sk_sk);
@@ -1717,7 +1172,16 @@ int svc_addsock(struct svc_serv *serv,
1717 else { 1172 else {
1718 svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); 1173 svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
1719 if (svsk) { 1174 if (svsk) {
1720 svc_sock_received(svsk); 1175 struct sockaddr_storage addr;
1176 struct sockaddr *sin = (struct sockaddr *)&addr;
1177 int salen;
1178 if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
1179 svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
1180 clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
1181 spin_lock_bh(&serv->sv_lock);
1182 list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks);
1183 spin_unlock_bh(&serv->sv_lock);
1184 svc_xprt_received(&svsk->sk_xprt);
1721 err = 0; 1185 err = 0;
1722 } 1186 }
1723 } 1187 }
@@ -1733,14 +1197,19 @@ EXPORT_SYMBOL_GPL(svc_addsock);
1733/* 1197/*
1734 * Create socket for RPC service. 1198 * Create socket for RPC service.
1735 */ 1199 */
1736static int svc_create_socket(struct svc_serv *serv, int protocol, 1200static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1737 struct sockaddr *sin, int len, int flags) 1201 int protocol,
1202 struct sockaddr *sin, int len,
1203 int flags)
1738{ 1204{
1739 struct svc_sock *svsk; 1205 struct svc_sock *svsk;
1740 struct socket *sock; 1206 struct socket *sock;
1741 int error; 1207 int error;
1742 int type; 1208 int type;
1743 char buf[RPC_MAX_ADDRBUFLEN]; 1209 char buf[RPC_MAX_ADDRBUFLEN];
1210 struct sockaddr_storage addr;
1211 struct sockaddr *newsin = (struct sockaddr *)&addr;
1212 int newlen;
1744 1213
1745 dprintk("svc: svc_create_socket(%s, %d, %s)\n", 1214 dprintk("svc: svc_create_socket(%s, %d, %s)\n",
1746 serv->sv_program->pg_name, protocol, 1215 serv->sv_program->pg_name, protocol,
@@ -1749,13 +1218,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol,
1749 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { 1218 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
1750 printk(KERN_WARNING "svc: only UDP and TCP " 1219 printk(KERN_WARNING "svc: only UDP and TCP "
1751 "sockets supported\n"); 1220 "sockets supported\n");
1752 return -EINVAL; 1221 return ERR_PTR(-EINVAL);
1753 } 1222 }
1754 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 1223 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1755 1224
1756 error = sock_create_kern(sin->sa_family, type, protocol, &sock); 1225 error = sock_create_kern(sin->sa_family, type, protocol, &sock);
1757 if (error < 0) 1226 if (error < 0)
1758 return error; 1227 return ERR_PTR(error);
1759 1228
1760 svc_reclassify_socket(sock); 1229 svc_reclassify_socket(sock);
1761 1230
@@ -1765,203 +1234,55 @@ static int svc_create_socket(struct svc_serv *serv, int protocol,
1765 if (error < 0) 1234 if (error < 0)
1766 goto bummer; 1235 goto bummer;
1767 1236
1237 newlen = len;
1238 error = kernel_getsockname(sock, newsin, &newlen);
1239 if (error < 0)
1240 goto bummer;
1241
1768 if (protocol == IPPROTO_TCP) { 1242 if (protocol == IPPROTO_TCP) {
1769 if ((error = kernel_listen(sock, 64)) < 0) 1243 if ((error = kernel_listen(sock, 64)) < 0)
1770 goto bummer; 1244 goto bummer;
1771 } 1245 }
1772 1246
1773 if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { 1247 if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
1774 svc_sock_received(svsk); 1248 svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
1775 return ntohs(inet_sk(svsk->sk_sk)->sport); 1249 return (struct svc_xprt *)svsk;
1776 } 1250 }
1777 1251
1778bummer: 1252bummer:
1779 dprintk("svc: svc_create_socket error = %d\n", -error); 1253 dprintk("svc: svc_create_socket error = %d\n", -error);
1780 sock_release(sock); 1254 sock_release(sock);
1781 return error; 1255 return ERR_PTR(error);
1782} 1256}
1783 1257
1784/* 1258/*
1785 * Remove a dead socket 1259 * Detach the svc_sock from the socket so that no
1260 * more callbacks occur.
1786 */ 1261 */
1787static void 1262static void svc_sock_detach(struct svc_xprt *xprt)
1788svc_delete_socket(struct svc_sock *svsk)
1789{ 1263{
1790 struct svc_serv *serv; 1264 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1791 struct sock *sk; 1265 struct sock *sk = svsk->sk_sk;
1792
1793 dprintk("svc: svc_delete_socket(%p)\n", svsk);
1794 1266
1795 serv = svsk->sk_server; 1267 dprintk("svc: svc_sock_detach(%p)\n", svsk);
1796 sk = svsk->sk_sk;
1797 1268
1269 /* put back the old socket callbacks */
1798 sk->sk_state_change = svsk->sk_ostate; 1270 sk->sk_state_change = svsk->sk_ostate;
1799 sk->sk_data_ready = svsk->sk_odata; 1271 sk->sk_data_ready = svsk->sk_odata;
1800 sk->sk_write_space = svsk->sk_owspace; 1272 sk->sk_write_space = svsk->sk_owspace;
1801
1802 spin_lock_bh(&serv->sv_lock);
1803
1804 if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
1805 list_del_init(&svsk->sk_list);
1806 /*
1807 * We used to delete the svc_sock from whichever list
1808 * it's sk_ready node was on, but we don't actually
1809 * need to. This is because the only time we're called
1810 * while still attached to a queue, the queue itself
1811 * is about to be destroyed (in svc_destroy).
1812 */
1813 if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
1814 BUG_ON(atomic_read(&svsk->sk_inuse)<2);
1815 atomic_dec(&svsk->sk_inuse);
1816 if (test_bit(SK_TEMP, &svsk->sk_flags))
1817 serv->sv_tmpcnt--;
1818 }
1819
1820 spin_unlock_bh(&serv->sv_lock);
1821}
1822
1823static void svc_close_socket(struct svc_sock *svsk)
1824{
1825 set_bit(SK_CLOSE, &svsk->sk_flags);
1826 if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
1827 /* someone else will have to effect the close */
1828 return;
1829
1830 atomic_inc(&svsk->sk_inuse);
1831 svc_delete_socket(svsk);
1832 clear_bit(SK_BUSY, &svsk->sk_flags);
1833 svc_sock_put(svsk);
1834}
1835
1836void svc_force_close_socket(struct svc_sock *svsk)
1837{
1838 set_bit(SK_CLOSE, &svsk->sk_flags);
1839 if (test_bit(SK_BUSY, &svsk->sk_flags)) {
1840 /* Waiting to be processed, but no threads left,
1841 * So just remove it from the waiting list
1842 */
1843 list_del_init(&svsk->sk_ready);
1844 clear_bit(SK_BUSY, &svsk->sk_flags);
1845 }
1846 svc_close_socket(svsk);
1847}
1848
1849/**
1850 * svc_makesock - Make a socket for nfsd and lockd
1851 * @serv: RPC server structure
1852 * @protocol: transport protocol to use
1853 * @port: port to use
1854 * @flags: requested socket characteristics
1855 *
1856 */
1857int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port,
1858 int flags)
1859{
1860 struct sockaddr_in sin = {
1861 .sin_family = AF_INET,
1862 .sin_addr.s_addr = INADDR_ANY,
1863 .sin_port = htons(port),
1864 };
1865
1866 dprintk("svc: creating socket proto = %d\n", protocol);
1867 return svc_create_socket(serv, protocol, (struct sockaddr *) &sin,
1868 sizeof(sin), flags);
1869} 1273}
1870 1274
1871/* 1275/*
1872 * Handle defer and revisit of requests 1276 * Free the svc_sock's socket resources and the svc_sock itself.
1873 */ 1277 */
1874 1278static void svc_sock_free(struct svc_xprt *xprt)
1875static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
1876{ 1279{
1877 struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); 1280 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1878 struct svc_sock *svsk; 1281 dprintk("svc: svc_sock_free(%p)\n", svsk);
1879 1282
1880 if (too_many) { 1283 if (svsk->sk_sock->file)
1881 svc_sock_put(dr->svsk); 1284 sockfd_put(svsk->sk_sock);
1882 kfree(dr); 1285 else
1883 return; 1286 sock_release(svsk->sk_sock);
1884 } 1287 kfree(svsk);
1885 dprintk("revisit queued\n");
1886 svsk = dr->svsk;
1887 dr->svsk = NULL;
1888 spin_lock(&svsk->sk_lock);
1889 list_add(&dr->handle.recent, &svsk->sk_deferred);
1890 spin_unlock(&svsk->sk_lock);
1891 set_bit(SK_DEFERRED, &svsk->sk_flags);
1892 svc_sock_enqueue(svsk);
1893 svc_sock_put(svsk);
1894}
1895
1896static struct cache_deferred_req *
1897svc_defer(struct cache_req *req)
1898{
1899 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
1900 int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
1901 struct svc_deferred_req *dr;
1902
1903 if (rqstp->rq_arg.page_len)
1904 return NULL; /* if more than a page, give up FIXME */
1905 if (rqstp->rq_deferred) {
1906 dr = rqstp->rq_deferred;
1907 rqstp->rq_deferred = NULL;
1908 } else {
1909 int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1910 /* FIXME maybe discard if size too large */
1911 dr = kmalloc(size, GFP_KERNEL);
1912 if (dr == NULL)
1913 return NULL;
1914
1915 dr->handle.owner = rqstp->rq_server;
1916 dr->prot = rqstp->rq_prot;
1917 memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
1918 dr->addrlen = rqstp->rq_addrlen;
1919 dr->daddr = rqstp->rq_daddr;
1920 dr->argslen = rqstp->rq_arg.len >> 2;
1921 memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
1922 }
1923 atomic_inc(&rqstp->rq_sock->sk_inuse);
1924 dr->svsk = rqstp->rq_sock;
1925
1926 dr->handle.revisit = svc_revisit;
1927 return &dr->handle;
1928}
1929
1930/*
1931 * recv data from a deferred request into an active one
1932 */
1933static int svc_deferred_recv(struct svc_rqst *rqstp)
1934{
1935 struct svc_deferred_req *dr = rqstp->rq_deferred;
1936
1937 rqstp->rq_arg.head[0].iov_base = dr->args;
1938 rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
1939 rqstp->rq_arg.page_len = 0;
1940 rqstp->rq_arg.len = dr->argslen<<2;
1941 rqstp->rq_prot = dr->prot;
1942 memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
1943 rqstp->rq_addrlen = dr->addrlen;
1944 rqstp->rq_daddr = dr->daddr;
1945 rqstp->rq_respages = rqstp->rq_pages;
1946 return dr->argslen<<2;
1947}
1948
1949
1950static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
1951{
1952 struct svc_deferred_req *dr = NULL;
1953
1954 if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
1955 return NULL;
1956 spin_lock(&svsk->sk_lock);
1957 clear_bit(SK_DEFERRED, &svsk->sk_flags);
1958 if (!list_empty(&svsk->sk_deferred)) {
1959 dr = list_entry(svsk->sk_deferred.next,
1960 struct svc_deferred_req,
1961 handle.recent);
1962 list_del_init(&dr->handle.recent);
1963 set_bit(SK_DEFERRED, &svsk->sk_flags);
1964 }
1965 spin_unlock(&svsk->sk_lock);
1966 return dr;
1967} 1288}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 864b541bbf51..0f8c439b848a 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -18,14 +18,22 @@
18#include <linux/sunrpc/types.h> 18#include <linux/sunrpc/types.h>
19#include <linux/sunrpc/sched.h> 19#include <linux/sunrpc/sched.h>
20#include <linux/sunrpc/stats.h> 20#include <linux/sunrpc/stats.h>
21#include <linux/sunrpc/svc_xprt.h>
21 22
22/* 23/*
23 * Declare the debug flags here 24 * Declare the debug flags here
24 */ 25 */
25unsigned int rpc_debug; 26unsigned int rpc_debug;
27EXPORT_SYMBOL_GPL(rpc_debug);
28
26unsigned int nfs_debug; 29unsigned int nfs_debug;
30EXPORT_SYMBOL_GPL(nfs_debug);
31
27unsigned int nfsd_debug; 32unsigned int nfsd_debug;
33EXPORT_SYMBOL_GPL(nfsd_debug);
34
28unsigned int nlm_debug; 35unsigned int nlm_debug;
36EXPORT_SYMBOL_GPL(nlm_debug);
29 37
30#ifdef RPC_DEBUG 38#ifdef RPC_DEBUG
31 39
@@ -48,6 +56,30 @@ rpc_unregister_sysctl(void)
48 } 56 }
49} 57}
50 58
59static int proc_do_xprt(ctl_table *table, int write, struct file *file,
60 void __user *buffer, size_t *lenp, loff_t *ppos)
61{
62 char tmpbuf[256];
63 int len;
64 if ((*ppos && !write) || !*lenp) {
65 *lenp = 0;
66 return 0;
67 }
68 if (write)
69 return -EINVAL;
70 else {
71 len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
72 if (!access_ok(VERIFY_WRITE, buffer, len))
73 return -EFAULT;
74
75 if (__copy_to_user(buffer, tmpbuf, len))
76 return -EFAULT;
77 }
78 *lenp -= len;
79 *ppos += len;
80 return 0;
81}
82
51static int 83static int
52proc_dodebug(ctl_table *table, int write, struct file *file, 84proc_dodebug(ctl_table *table, int write, struct file *file,
53 void __user *buffer, size_t *lenp, loff_t *ppos) 85 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -87,9 +119,8 @@ proc_dodebug(ctl_table *table, int write, struct file *file,
87 left--, s++; 119 left--, s++;
88 *(unsigned int *) table->data = value; 120 *(unsigned int *) table->data = value;
89 /* Display the RPC tasks on writing to rpc_debug */ 121 /* Display the RPC tasks on writing to rpc_debug */
90 if (table->ctl_name == CTL_RPCDEBUG) { 122 if (strcmp(table->procname, "rpc_debug") == 0)
91 rpc_show_tasks(); 123 rpc_show_tasks();
92 }
93 } else { 124 } else {
94 if (!access_ok(VERIFY_WRITE, buffer, left)) 125 if (!access_ok(VERIFY_WRITE, buffer, left))
95 return -EFAULT; 126 return -EFAULT;
@@ -141,6 +172,12 @@ static ctl_table debug_table[] = {
141 .mode = 0644, 172 .mode = 0644,
142 .proc_handler = &proc_dodebug 173 .proc_handler = &proc_dodebug
143 }, 174 },
175 {
176 .procname = "transports",
177 .maxlen = 256,
178 .mode = 0444,
179 .proc_handler = &proc_do_xprt,
180 },
144 { .ctl_name = 0 } 181 { .ctl_name = 0 }
145}; 182};
146 183
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index fdc5e6d7562b..995c3fdc16c2 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -28,6 +28,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
28 memcpy(p, obj->data, obj->len); 28 memcpy(p, obj->data, obj->len);
29 return p + XDR_QUADLEN(obj->len); 29 return p + XDR_QUADLEN(obj->len);
30} 30}
31EXPORT_SYMBOL(xdr_encode_netobj);
31 32
32__be32 * 33__be32 *
33xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) 34xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
@@ -40,6 +41,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
40 obj->data = (u8 *) p; 41 obj->data = (u8 *) p;
41 return p + XDR_QUADLEN(len); 42 return p + XDR_QUADLEN(len);
42} 43}
44EXPORT_SYMBOL(xdr_decode_netobj);
43 45
44/** 46/**
45 * xdr_encode_opaque_fixed - Encode fixed length opaque data 47 * xdr_encode_opaque_fixed - Encode fixed length opaque data
@@ -91,18 +93,22 @@ xdr_encode_string(__be32 *p, const char *string)
91{ 93{
92 return xdr_encode_array(p, string, strlen(string)); 94 return xdr_encode_array(p, string, strlen(string));
93} 95}
96EXPORT_SYMBOL(xdr_encode_string);
94 97
95__be32 * 98__be32 *
96xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) 99xdr_decode_string_inplace(__be32 *p, char **sp,
100 unsigned int *lenp, unsigned int maxlen)
97{ 101{
98 unsigned int len; 102 u32 len;
99 103
100 if ((len = ntohl(*p++)) > maxlen) 104 len = ntohl(*p++);
105 if (len > maxlen)
101 return NULL; 106 return NULL;
102 *lenp = len; 107 *lenp = len;
103 *sp = (char *) p; 108 *sp = (char *) p;
104 return p + XDR_QUADLEN(len); 109 return p + XDR_QUADLEN(len);
105} 110}
111EXPORT_SYMBOL(xdr_decode_string_inplace);
106 112
107void 113void
108xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, 114xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
@@ -130,6 +136,7 @@ xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
130 xdr->buflen += len; 136 xdr->buflen += len;
131 xdr->len += len; 137 xdr->len += len;
132} 138}
139EXPORT_SYMBOL(xdr_encode_pages);
133 140
134void 141void
135xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, 142xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
@@ -151,7 +158,7 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
151 158
152 xdr->buflen += len; 159 xdr->buflen += len;
153} 160}
154 161EXPORT_SYMBOL(xdr_inline_pages);
155 162
156/* 163/*
157 * Helper routines for doing 'memmove' like operations on a struct xdr_buf 164 * Helper routines for doing 'memmove' like operations on a struct xdr_buf
@@ -418,6 +425,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
418{ 425{
419 xdr_shrink_bufhead(buf, len); 426 xdr_shrink_bufhead(buf, len);
420} 427}
428EXPORT_SYMBOL(xdr_shift_buf);
421 429
422/** 430/**
423 * xdr_init_encode - Initialize a struct xdr_stream for sending data. 431 * xdr_init_encode - Initialize a struct xdr_stream for sending data.
@@ -639,6 +647,7 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
639 buf->page_len = 0; 647 buf->page_len = 0;
640 buf->buflen = buf->len = iov->iov_len; 648 buf->buflen = buf->len = iov->iov_len;
641} 649}
650EXPORT_SYMBOL(xdr_buf_from_iov);
642 651
643/* Sets subbuf to the portion of buf of length len beginning base bytes 652/* Sets subbuf to the portion of buf of length len beginning base bytes
644 * from the start of buf. Returns -1 if base of length are out of bounds. */ 653 * from the start of buf. Returns -1 if base of length are out of bounds. */
@@ -687,6 +696,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
687 return -1; 696 return -1;
688 return 0; 697 return 0;
689} 698}
699EXPORT_SYMBOL(xdr_buf_subsegment);
690 700
691static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) 701static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
692{ 702{
@@ -717,6 +727,7 @@ int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, u
717 __read_bytes_from_xdr_buf(&subbuf, obj, len); 727 __read_bytes_from_xdr_buf(&subbuf, obj, len);
718 return 0; 728 return 0;
719} 729}
730EXPORT_SYMBOL(read_bytes_from_xdr_buf);
720 731
721static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) 732static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
722{ 733{
@@ -760,6 +771,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
760 *obj = ntohl(raw); 771 *obj = ntohl(raw);
761 return 0; 772 return 0;
762} 773}
774EXPORT_SYMBOL(xdr_decode_word);
763 775
764int 776int
765xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) 777xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
@@ -768,6 +780,7 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
768 780
769 return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); 781 return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
770} 782}
783EXPORT_SYMBOL(xdr_encode_word);
771 784
772/* If the netobj starting offset bytes from the start of xdr_buf is contained 785/* If the netobj starting offset bytes from the start of xdr_buf is contained
773 * entirely in the head or the tail, set object to point to it; otherwise 786 * entirely in the head or the tail, set object to point to it; otherwise
@@ -805,6 +818,7 @@ int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned in
805 __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len); 818 __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
806 return 0; 819 return 0;
807} 820}
821EXPORT_SYMBOL(xdr_buf_read_netobj);
808 822
809/* Returns 0 on success, or else a negative error code. */ 823/* Returns 0 on success, or else a negative error code. */
810static int 824static int
@@ -1010,6 +1024,7 @@ xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
1010 1024
1011 return xdr_xcode_array2(buf, base, desc, 0); 1025 return xdr_xcode_array2(buf, base, desc, 0);
1012} 1026}
1027EXPORT_SYMBOL(xdr_decode_array2);
1013 1028
1014int 1029int
1015xdr_encode_array2(struct xdr_buf *buf, unsigned int base, 1030xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
@@ -1021,6 +1036,7 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
1021 1036
1022 return xdr_xcode_array2(buf, base, desc, 1); 1037 return xdr_xcode_array2(buf, base, desc, 1);
1023} 1038}
1039EXPORT_SYMBOL(xdr_encode_array2);
1024 1040
1025int 1041int
1026xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, 1042xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 282a9a2ec90c..cfcade906a56 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,7 +62,7 @@ static inline void do_xprt_reserve(struct rpc_task *);
62static void xprt_connect_status(struct rpc_task *task); 62static void xprt_connect_status(struct rpc_task *task);
63static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 63static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
64 64
65static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; 65static DEFINE_SPINLOCK(xprt_list_lock);
66static LIST_HEAD(xprt_list); 66static LIST_HEAD(xprt_list);
67 67
68/* 68/*
@@ -501,9 +501,10 @@ EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
501void xprt_set_retrans_timeout_rtt(struct rpc_task *task) 501void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
502{ 502{
503 int timer = task->tk_msg.rpc_proc->p_timer; 503 int timer = task->tk_msg.rpc_proc->p_timer;
504 struct rpc_rtt *rtt = task->tk_client->cl_rtt; 504 struct rpc_clnt *clnt = task->tk_client;
505 struct rpc_rtt *rtt = clnt->cl_rtt;
505 struct rpc_rqst *req = task->tk_rqstp; 506 struct rpc_rqst *req = task->tk_rqstp;
506 unsigned long max_timeout = req->rq_xprt->timeout.to_maxval; 507 unsigned long max_timeout = clnt->cl_timeout->to_maxval;
507 508
508 task->tk_timeout = rpc_calc_rto(rtt, timer); 509 task->tk_timeout = rpc_calc_rto(rtt, timer);
509 task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; 510 task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
@@ -514,7 +515,7 @@ EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
514 515
515static void xprt_reset_majortimeo(struct rpc_rqst *req) 516static void xprt_reset_majortimeo(struct rpc_rqst *req)
516{ 517{
517 struct rpc_timeout *to = &req->rq_xprt->timeout; 518 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
518 519
519 req->rq_majortimeo = req->rq_timeout; 520 req->rq_majortimeo = req->rq_timeout;
520 if (to->to_exponential) 521 if (to->to_exponential)
@@ -534,7 +535,7 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req)
534int xprt_adjust_timeout(struct rpc_rqst *req) 535int xprt_adjust_timeout(struct rpc_rqst *req)
535{ 536{
536 struct rpc_xprt *xprt = req->rq_xprt; 537 struct rpc_xprt *xprt = req->rq_xprt;
537 struct rpc_timeout *to = &xprt->timeout; 538 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
538 int status = 0; 539 int status = 0;
539 540
540 if (time_before(jiffies, req->rq_majortimeo)) { 541 if (time_before(jiffies, req->rq_majortimeo)) {
@@ -568,17 +569,17 @@ static void xprt_autoclose(struct work_struct *work)
568 struct rpc_xprt *xprt = 569 struct rpc_xprt *xprt =
569 container_of(work, struct rpc_xprt, task_cleanup); 570 container_of(work, struct rpc_xprt, task_cleanup);
570 571
571 xprt_disconnect(xprt);
572 xprt->ops->close(xprt); 572 xprt->ops->close(xprt);
573 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
573 xprt_release_write(xprt, NULL); 574 xprt_release_write(xprt, NULL);
574} 575}
575 576
576/** 577/**
577 * xprt_disconnect - mark a transport as disconnected 578 * xprt_disconnect_done - mark a transport as disconnected
578 * @xprt: transport to flag for disconnect 579 * @xprt: transport to flag for disconnect
579 * 580 *
580 */ 581 */
581void xprt_disconnect(struct rpc_xprt *xprt) 582void xprt_disconnect_done(struct rpc_xprt *xprt)
582{ 583{
583 dprintk("RPC: disconnected transport %p\n", xprt); 584 dprintk("RPC: disconnected transport %p\n", xprt);
584 spin_lock_bh(&xprt->transport_lock); 585 spin_lock_bh(&xprt->transport_lock);
@@ -586,7 +587,26 @@ void xprt_disconnect(struct rpc_xprt *xprt)
586 xprt_wake_pending_tasks(xprt, -ENOTCONN); 587 xprt_wake_pending_tasks(xprt, -ENOTCONN);
587 spin_unlock_bh(&xprt->transport_lock); 588 spin_unlock_bh(&xprt->transport_lock);
588} 589}
589EXPORT_SYMBOL_GPL(xprt_disconnect); 590EXPORT_SYMBOL_GPL(xprt_disconnect_done);
591
592/**
593 * xprt_force_disconnect - force a transport to disconnect
594 * @xprt: transport to disconnect
595 *
596 */
597void xprt_force_disconnect(struct rpc_xprt *xprt)
598{
599 /* Don't race with the test_bit() in xprt_clear_locked() */
600 spin_lock_bh(&xprt->transport_lock);
601 set_bit(XPRT_CLOSE_WAIT, &xprt->state);
602 /* Try to schedule an autoclose RPC call */
603 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
604 queue_work(rpciod_workqueue, &xprt->task_cleanup);
605 else if (xprt->snd_task != NULL)
606 rpc_wake_up_task(xprt->snd_task);
607 spin_unlock_bh(&xprt->transport_lock);
608}
609EXPORT_SYMBOL_GPL(xprt_force_disconnect);
590 610
591static void 611static void
592xprt_init_autodisconnect(unsigned long data) 612xprt_init_autodisconnect(unsigned long data)
@@ -909,7 +929,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
909{ 929{
910 struct rpc_rqst *req = task->tk_rqstp; 930 struct rpc_rqst *req = task->tk_rqstp;
911 931
912 req->rq_timeout = xprt->timeout.to_initval; 932 req->rq_timeout = task->tk_client->cl_timeout->to_initval;
913 req->rq_task = task; 933 req->rq_task = task;
914 req->rq_xprt = xprt; 934 req->rq_xprt = xprt;
915 req->rq_buffer = NULL; 935 req->rq_buffer = NULL;
@@ -959,22 +979,6 @@ void xprt_release(struct rpc_task *task)
959} 979}
960 980
961/** 981/**
962 * xprt_set_timeout - set constant RPC timeout
963 * @to: RPC timeout parameters to set up
964 * @retr: number of retries
965 * @incr: amount of increase after each retry
966 *
967 */
968void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
969{
970 to->to_initval =
971 to->to_increment = incr;
972 to->to_maxval = to->to_initval + (incr * retr);
973 to->to_retries = retr;
974 to->to_exponential = 0;
975}
976
977/**
978 * xprt_create_transport - create an RPC transport 982 * xprt_create_transport - create an RPC transport
979 * @args: rpc transport creation arguments 983 * @args: rpc transport creation arguments
980 * 984 *
@@ -1011,9 +1015,8 @@ found:
1011 INIT_LIST_HEAD(&xprt->free); 1015 INIT_LIST_HEAD(&xprt->free);
1012 INIT_LIST_HEAD(&xprt->recv); 1016 INIT_LIST_HEAD(&xprt->recv);
1013 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1017 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1014 init_timer(&xprt->timer); 1018 setup_timer(&xprt->timer, xprt_init_autodisconnect,
1015 xprt->timer.function = xprt_init_autodisconnect; 1019 (unsigned long)xprt);
1016 xprt->timer.data = (unsigned long) xprt;
1017 xprt->last_used = jiffies; 1020 xprt->last_used = jiffies;
1018 xprt->cwnd = RPC_INITCWND; 1021 xprt->cwnd = RPC_INITCWND;
1019 xprt->bind_index = 0; 1022 xprt->bind_index = 0;
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 264f0feeb513..5a8f268bdd30 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,3 +1,8 @@
1obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o 1obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
2 2
3xprtrdma-y := transport.o rpc_rdma.o verbs.o 3xprtrdma-y := transport.o rpc_rdma.o verbs.o
4
5obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o
6
7svcrdma-y := svc_rdma.o svc_rdma_transport.o \
8 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 12db63580427..e55427f73dfe 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -83,7 +83,7 @@ static const char transfertypes[][12] = {
83 */ 83 */
84 84
85static int 85static int
86rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos, 86rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) 87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
88{ 88{
89 int len, n = 0, p; 89 int len, n = 0, p;
@@ -92,7 +92,6 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
92 seg[n].mr_page = NULL; 92 seg[n].mr_page = NULL;
93 seg[n].mr_offset = xdrbuf->head[0].iov_base; 93 seg[n].mr_offset = xdrbuf->head[0].iov_base;
94 seg[n].mr_len = xdrbuf->head[0].iov_len; 94 seg[n].mr_len = xdrbuf->head[0].iov_len;
95 pos += xdrbuf->head[0].iov_len;
96 ++n; 95 ++n;
97 } 96 }
98 97
@@ -104,7 +103,6 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
104 seg[n].mr_len = min_t(u32, 103 seg[n].mr_len = min_t(u32,
105 PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); 104 PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
106 len = xdrbuf->page_len - seg[n].mr_len; 105 len = xdrbuf->page_len - seg[n].mr_len;
107 pos += len;
108 ++n; 106 ++n;
109 p = 1; 107 p = 1;
110 while (len > 0) { 108 while (len > 0) {
@@ -119,20 +117,15 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
119 } 117 }
120 } 118 }
121 119
122 if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) { 120 if (xdrbuf->tail[0].iov_len) {
123 if (n == nsegs) 121 if (n == nsegs)
124 return 0; 122 return 0;
125 seg[n].mr_page = NULL; 123 seg[n].mr_page = NULL;
126 seg[n].mr_offset = xdrbuf->tail[0].iov_base; 124 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
127 seg[n].mr_len = xdrbuf->tail[0].iov_len; 125 seg[n].mr_len = xdrbuf->tail[0].iov_len;
128 pos += xdrbuf->tail[0].iov_len;
129 ++n; 126 ++n;
130 } 127 }
131 128
132 if (pos < xdrbuf->len)
133 dprintk("RPC: %s: marshaled only %d of %d\n",
134 __func__, pos, xdrbuf->len);
135
136 return n; 129 return n;
137} 130}
138 131
@@ -176,12 +169,12 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
176 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 169 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
177 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); 170 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
178 int nsegs, nchunks = 0; 171 int nsegs, nchunks = 0;
179 int pos; 172 unsigned int pos;
180 struct rpcrdma_mr_seg *seg = req->rl_segments; 173 struct rpcrdma_mr_seg *seg = req->rl_segments;
181 struct rpcrdma_read_chunk *cur_rchunk = NULL; 174 struct rpcrdma_read_chunk *cur_rchunk = NULL;
182 struct rpcrdma_write_array *warray = NULL; 175 struct rpcrdma_write_array *warray = NULL;
183 struct rpcrdma_write_chunk *cur_wchunk = NULL; 176 struct rpcrdma_write_chunk *cur_wchunk = NULL;
184 u32 *iptr = headerp->rm_body.rm_chunks; 177 __be32 *iptr = headerp->rm_body.rm_chunks;
185 178
186 if (type == rpcrdma_readch || type == rpcrdma_areadch) { 179 if (type == rpcrdma_readch || type == rpcrdma_areadch) {
187 /* a read chunk - server will RDMA Read our memory */ 180 /* a read chunk - server will RDMA Read our memory */
@@ -217,25 +210,25 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
217 cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); 210 cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
218 cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); 211 cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
219 xdr_encode_hyper( 212 xdr_encode_hyper(
220 (u32 *)&cur_rchunk->rc_target.rs_offset, 213 (__be32 *)&cur_rchunk->rc_target.rs_offset,
221 seg->mr_base); 214 seg->mr_base);
222 dprintk("RPC: %s: read chunk " 215 dprintk("RPC: %s: read chunk "
223 "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__, 216 "elem %d@0x%llx:0x%x pos %u (%s)\n", __func__,
224 seg->mr_len, seg->mr_base, seg->mr_rkey, pos, 217 seg->mr_len, (unsigned long long)seg->mr_base,
225 n < nsegs ? "more" : "last"); 218 seg->mr_rkey, pos, n < nsegs ? "more" : "last");
226 cur_rchunk++; 219 cur_rchunk++;
227 r_xprt->rx_stats.read_chunk_count++; 220 r_xprt->rx_stats.read_chunk_count++;
228 } else { /* write/reply */ 221 } else { /* write/reply */
229 cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); 222 cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
230 cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); 223 cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
231 xdr_encode_hyper( 224 xdr_encode_hyper(
232 (u32 *)&cur_wchunk->wc_target.rs_offset, 225 (__be32 *)&cur_wchunk->wc_target.rs_offset,
233 seg->mr_base); 226 seg->mr_base);
234 dprintk("RPC: %s: %s chunk " 227 dprintk("RPC: %s: %s chunk "
235 "elem %d@0x%llx:0x%x (%s)\n", __func__, 228 "elem %d@0x%llx:0x%x (%s)\n", __func__,
236 (type == rpcrdma_replych) ? "reply" : "write", 229 (type == rpcrdma_replych) ? "reply" : "write",
237 seg->mr_len, seg->mr_base, seg->mr_rkey, 230 seg->mr_len, (unsigned long long)seg->mr_base,
238 n < nsegs ? "more" : "last"); 231 seg->mr_rkey, n < nsegs ? "more" : "last");
239 cur_wchunk++; 232 cur_wchunk++;
240 if (type == rpcrdma_replych) 233 if (type == rpcrdma_replych)
241 r_xprt->rx_stats.reply_chunk_count++; 234 r_xprt->rx_stats.reply_chunk_count++;
@@ -257,14 +250,14 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
257 * finish off header. If write, marshal discrim and nchunks. 250 * finish off header. If write, marshal discrim and nchunks.
258 */ 251 */
259 if (cur_rchunk) { 252 if (cur_rchunk) {
260 iptr = (u32 *) cur_rchunk; 253 iptr = (__be32 *) cur_rchunk;
261 *iptr++ = xdr_zero; /* finish the read chunk list */ 254 *iptr++ = xdr_zero; /* finish the read chunk list */
262 *iptr++ = xdr_zero; /* encode a NULL write chunk list */ 255 *iptr++ = xdr_zero; /* encode a NULL write chunk list */
263 *iptr++ = xdr_zero; /* encode a NULL reply chunk */ 256 *iptr++ = xdr_zero; /* encode a NULL reply chunk */
264 } else { 257 } else {
265 warray->wc_discrim = xdr_one; 258 warray->wc_discrim = xdr_one;
266 warray->wc_nchunks = htonl(nchunks); 259 warray->wc_nchunks = htonl(nchunks);
267 iptr = (u32 *) cur_wchunk; 260 iptr = (__be32 *) cur_wchunk;
268 if (type == rpcrdma_writech) { 261 if (type == rpcrdma_writech) {
269 *iptr++ = xdr_zero; /* finish the write chunk list */ 262 *iptr++ = xdr_zero; /* finish the write chunk list */
270 *iptr++ = xdr_zero; /* encode a NULL reply chunk */ 263 *iptr++ = xdr_zero; /* encode a NULL reply chunk */
@@ -387,7 +380,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
387 headerp->rm_xid = rqst->rq_xid; 380 headerp->rm_xid = rqst->rq_xid;
388 headerp->rm_vers = xdr_one; 381 headerp->rm_vers = xdr_one;
389 headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); 382 headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
390 headerp->rm_type = __constant_htonl(RDMA_MSG); 383 headerp->rm_type = htonl(RDMA_MSG);
391 384
392 /* 385 /*
393 * Chunks needed for results? 386 * Chunks needed for results?
@@ -465,11 +458,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
465 RPCRDMA_INLINE_PAD_VALUE(rqst)); 458 RPCRDMA_INLINE_PAD_VALUE(rqst));
466 459
467 if (padlen) { 460 if (padlen) {
468 headerp->rm_type = __constant_htonl(RDMA_MSGP); 461 headerp->rm_type = htonl(RDMA_MSGP);
469 headerp->rm_body.rm_padded.rm_align = 462 headerp->rm_body.rm_padded.rm_align =
470 htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); 463 htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
471 headerp->rm_body.rm_padded.rm_thresh = 464 headerp->rm_body.rm_padded.rm_thresh =
472 __constant_htonl(RPCRDMA_INLINE_PAD_THRESH); 465 htonl(RPCRDMA_INLINE_PAD_THRESH);
473 headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; 466 headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
474 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; 467 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
475 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; 468 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
@@ -559,7 +552,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
559 * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) 552 * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
560 */ 553 */
561static int 554static int
562rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp) 555rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __be32 **iptrp)
563{ 556{
564 unsigned int i, total_len; 557 unsigned int i, total_len;
565 struct rpcrdma_write_chunk *cur_wchunk; 558 struct rpcrdma_write_chunk *cur_wchunk;
@@ -573,11 +566,11 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
573 struct rpcrdma_segment *seg = &cur_wchunk->wc_target; 566 struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
574 ifdebug(FACILITY) { 567 ifdebug(FACILITY) {
575 u64 off; 568 u64 off;
576 xdr_decode_hyper((u32 *)&seg->rs_offset, &off); 569 xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
577 dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", 570 dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
578 __func__, 571 __func__,
579 ntohl(seg->rs_length), 572 ntohl(seg->rs_length),
580 off, 573 (unsigned long long)off,
581 ntohl(seg->rs_handle)); 574 ntohl(seg->rs_handle));
582 } 575 }
583 total_len += ntohl(seg->rs_length); 576 total_len += ntohl(seg->rs_length);
@@ -585,7 +578,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
585 } 578 }
586 /* check and adjust for properly terminated write chunk */ 579 /* check and adjust for properly terminated write chunk */
587 if (wrchunk) { 580 if (wrchunk) {
588 u32 *w = (u32 *) cur_wchunk; 581 __be32 *w = (__be32 *) cur_wchunk;
589 if (*w++ != xdr_zero) 582 if (*w++ != xdr_zero)
590 return -1; 583 return -1;
591 cur_wchunk = (struct rpcrdma_write_chunk *) w; 584 cur_wchunk = (struct rpcrdma_write_chunk *) w;
@@ -593,7 +586,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
593 if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) 586 if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
594 return -1; 587 return -1;
595 588
596 *iptrp = (u32 *) cur_wchunk; 589 *iptrp = (__be32 *) cur_wchunk;
597 return total_len; 590 return total_len;
598} 591}
599 592
@@ -721,7 +714,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
721 struct rpc_rqst *rqst; 714 struct rpc_rqst *rqst;
722 struct rpc_xprt *xprt = rep->rr_xprt; 715 struct rpc_xprt *xprt = rep->rr_xprt;
723 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 716 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
724 u32 *iptr; 717 __be32 *iptr;
725 int i, rdmalen, status; 718 int i, rdmalen, status;
726 719
727 /* Check status. If bad, signal disconnect and return rep to pool */ 720 /* Check status. If bad, signal disconnect and return rep to pool */
@@ -801,7 +794,7 @@ repost:
801 r_xprt->rx_stats.total_rdma_reply += rdmalen; 794 r_xprt->rx_stats.total_rdma_reply += rdmalen;
802 } else { 795 } else {
803 /* else ordinary inline */ 796 /* else ordinary inline */
804 iptr = (u32 *)((unsigned char *)headerp + 28); 797 iptr = (__be32 *)((unsigned char *)headerp + 28);
805 rep->rr_len -= 28; /*sizeof *headerp;*/ 798 rep->rr_len -= 28; /*sizeof *headerp;*/
806 status = rep->rr_len; 799 status = rep->rr_len;
807 } 800 }
@@ -816,7 +809,7 @@ repost:
816 headerp->rm_body.rm_chunks[2] != xdr_one || 809 headerp->rm_body.rm_chunks[2] != xdr_one ||
817 req->rl_nchunks == 0) 810 req->rl_nchunks == 0)
818 goto badheader; 811 goto badheader;
819 iptr = (u32 *)((unsigned char *)headerp + 28); 812 iptr = (__be32 *)((unsigned char *)headerp + 28);
820 rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); 813 rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
821 if (rdmalen < 0) 814 if (rdmalen < 0)
822 goto badheader; 815 goto badheader;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
new file mode 100644
index 000000000000..88c0ca20bb1e
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -0,0 +1,266 @@
1/*
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/fs.h>
44#include <linux/sysctl.h>
45#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/sched.h>
47#include <linux/sunrpc/svc_rdma.h>
48
49#define RPCDBG_FACILITY RPCDBG_SVCXPRT
50
51/* RPC/RDMA parameters */
52unsigned int svcrdma_ord = RPCRDMA_ORD;
53static unsigned int min_ord = 1;
54static unsigned int max_ord = 4096;
55unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
56static unsigned int min_max_requests = 4;
57static unsigned int max_max_requests = 16384;
58unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
59static unsigned int min_max_inline = 4096;
60static unsigned int max_max_inline = 65536;
61
62atomic_t rdma_stat_recv;
63atomic_t rdma_stat_read;
64atomic_t rdma_stat_write;
65atomic_t rdma_stat_sq_starve;
66atomic_t rdma_stat_rq_starve;
67atomic_t rdma_stat_rq_poll;
68atomic_t rdma_stat_rq_prod;
69atomic_t rdma_stat_sq_poll;
70atomic_t rdma_stat_sq_prod;
71
72/*
73 * This function implements reading and resetting an atomic_t stat
74 * variable through read/write to a proc file. Any write to the file
75 * resets the associated statistic to zero. Any read returns it's
76 * current value.
77 */
78static int read_reset_stat(ctl_table *table, int write,
79 struct file *filp, void __user *buffer, size_t *lenp,
80 loff_t *ppos)
81{
82 atomic_t *stat = (atomic_t *)table->data;
83
84 if (!stat)
85 return -EINVAL;
86
87 if (write)
88 atomic_set(stat, 0);
89 else {
90 char str_buf[32];
91 char *data;
92 int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
93 if (len >= 32)
94 return -EFAULT;
95 len = strlen(str_buf);
96 if (*ppos > len) {
97 *lenp = 0;
98 return 0;
99 }
100 data = &str_buf[*ppos];
101 len -= *ppos;
102 if (len > *lenp)
103 len = *lenp;
104 if (len && copy_to_user(buffer, str_buf, len))
105 return -EFAULT;
106 *lenp = len;
107 *ppos += len;
108 }
109 return 0;
110}
111
112static struct ctl_table_header *svcrdma_table_header;
113static ctl_table svcrdma_parm_table[] = {
114 {
115 .procname = "max_requests",
116 .data = &svcrdma_max_requests,
117 .maxlen = sizeof(unsigned int),
118 .mode = 0644,
119 .proc_handler = &proc_dointvec_minmax,
120 .strategy = &sysctl_intvec,
121 .extra1 = &min_max_requests,
122 .extra2 = &max_max_requests
123 },
124 {
125 .procname = "max_req_size",
126 .data = &svcrdma_max_req_size,
127 .maxlen = sizeof(unsigned int),
128 .mode = 0644,
129 .proc_handler = &proc_dointvec_minmax,
130 .strategy = &sysctl_intvec,
131 .extra1 = &min_max_inline,
132 .extra2 = &max_max_inline
133 },
134 {
135 .procname = "max_outbound_read_requests",
136 .data = &svcrdma_ord,
137 .maxlen = sizeof(unsigned int),
138 .mode = 0644,
139 .proc_handler = &proc_dointvec_minmax,
140 .strategy = &sysctl_intvec,
141 .extra1 = &min_ord,
142 .extra2 = &max_ord,
143 },
144
145 {
146 .procname = "rdma_stat_read",
147 .data = &rdma_stat_read,
148 .maxlen = sizeof(atomic_t),
149 .mode = 0644,
150 .proc_handler = &read_reset_stat,
151 },
152 {
153 .procname = "rdma_stat_recv",
154 .data = &rdma_stat_recv,
155 .maxlen = sizeof(atomic_t),
156 .mode = 0644,
157 .proc_handler = &read_reset_stat,
158 },
159 {
160 .procname = "rdma_stat_write",
161 .data = &rdma_stat_write,
162 .maxlen = sizeof(atomic_t),
163 .mode = 0644,
164 .proc_handler = &read_reset_stat,
165 },
166 {
167 .procname = "rdma_stat_sq_starve",
168 .data = &rdma_stat_sq_starve,
169 .maxlen = sizeof(atomic_t),
170 .mode = 0644,
171 .proc_handler = &read_reset_stat,
172 },
173 {
174 .procname = "rdma_stat_rq_starve",
175 .data = &rdma_stat_rq_starve,
176 .maxlen = sizeof(atomic_t),
177 .mode = 0644,
178 .proc_handler = &read_reset_stat,
179 },
180 {
181 .procname = "rdma_stat_rq_poll",
182 .data = &rdma_stat_rq_poll,
183 .maxlen = sizeof(atomic_t),
184 .mode = 0644,
185 .proc_handler = &read_reset_stat,
186 },
187 {
188 .procname = "rdma_stat_rq_prod",
189 .data = &rdma_stat_rq_prod,
190 .maxlen = sizeof(atomic_t),
191 .mode = 0644,
192 .proc_handler = &read_reset_stat,
193 },
194 {
195 .procname = "rdma_stat_sq_poll",
196 .data = &rdma_stat_sq_poll,
197 .maxlen = sizeof(atomic_t),
198 .mode = 0644,
199 .proc_handler = &read_reset_stat,
200 },
201 {
202 .procname = "rdma_stat_sq_prod",
203 .data = &rdma_stat_sq_prod,
204 .maxlen = sizeof(atomic_t),
205 .mode = 0644,
206 .proc_handler = &read_reset_stat,
207 },
208 {
209 .ctl_name = 0,
210 },
211};
212
213static ctl_table svcrdma_table[] = {
214 {
215 .procname = "svc_rdma",
216 .mode = 0555,
217 .child = svcrdma_parm_table
218 },
219 {
220 .ctl_name = 0,
221 },
222};
223
224static ctl_table svcrdma_root_table[] = {
225 {
226 .ctl_name = CTL_SUNRPC,
227 .procname = "sunrpc",
228 .mode = 0555,
229 .child = svcrdma_table
230 },
231 {
232 .ctl_name = 0,
233 },
234};
235
236void svc_rdma_cleanup(void)
237{
238 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
239 if (svcrdma_table_header) {
240 unregister_sysctl_table(svcrdma_table_header);
241 svcrdma_table_header = NULL;
242 }
243 svc_unreg_xprt_class(&svc_rdma_class);
244}
245
246int svc_rdma_init(void)
247{
248 dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
249 dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
250 dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
251 dprintk("\tsq_depth : %d\n",
252 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
253 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
254 if (!svcrdma_table_header)
255 svcrdma_table_header =
256 register_sysctl_table(svcrdma_root_table);
257
258 /* Register RDMA with the SVC transport switch */
259 svc_reg_xprt_class(&svc_rdma_class);
260 return 0;
261}
262MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
263MODULE_DESCRIPTION("SVC RDMA Transport");
264MODULE_LICENSE("Dual BSD/GPL");
265module_init(svc_rdma_init);
266module_exit(svc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
new file mode 100644
index 000000000000..9530ef2d40dc
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -0,0 +1,412 @@
1/*
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41
42#include <linux/sunrpc/xdr.h>
43#include <linux/sunrpc/debug.h>
44#include <asm/unaligned.h>
45#include <linux/sunrpc/rpc_rdma.h>
46#include <linux/sunrpc/svc_rdma.h>
47
48#define RPCDBG_FACILITY RPCDBG_SVCXPRT
49
50/*
51 * Decodes a read chunk list. The expected format is as follows:
52 * descrim : xdr_one
53 * position : u32 offset into XDR stream
54 * handle : u32 RKEY
55 * . . .
56 * end-of-list: xdr_zero
57 */
58static u32 *decode_read_list(u32 *va, u32 *vaend)
59{
60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61
62 while (ch->rc_discrim != xdr_zero) {
63 u64 ch_offset;
64
65 if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
66 (unsigned long)vaend) {
67 dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
68 return NULL;
69 }
70
71 ch->rc_discrim = ntohl(ch->rc_discrim);
72 ch->rc_position = ntohl(ch->rc_position);
73 ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle);
74 ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length);
75 va = (u32 *)&ch->rc_target.rs_offset;
76 xdr_decode_hyper(va, &ch_offset);
77 put_unaligned(ch_offset, (u64 *)va);
78 ch++;
79 }
80 return (u32 *)&ch->rc_position;
81}
82
83/*
84 * Determine number of chunks and total bytes in chunk list. The chunk
85 * list has already been verified to fit within the RPCRDMA header.
86 */
87void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
88 int *ch_count, int *byte_count)
89{
90 /* compute the number of bytes represented by read chunks */
91 *byte_count = 0;
92 *ch_count = 0;
93 for (; ch->rc_discrim != 0; ch++) {
94 *byte_count = *byte_count + ch->rc_target.rs_length;
95 *ch_count = *ch_count + 1;
96 }
97}
98
99/*
100 * Decodes a write chunk list. The expected format is as follows:
101 * descrim : xdr_one
102 * nchunks : <count>
103 * handle : u32 RKEY ---+
104 * length : u32 <len of segment> |
105 * offset : remove va + <count>
106 * . . . |
107 * ---+
108 */
109static u32 *decode_write_list(u32 *va, u32 *vaend)
110{
111 int ch_no;
112 struct rpcrdma_write_array *ary =
113 (struct rpcrdma_write_array *)va;
114
115 /* Check for not write-array */
116 if (ary->wc_discrim == xdr_zero)
117 return (u32 *)&ary->wc_nchunks;
118
119 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
120 (unsigned long)vaend) {
121 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
122 return NULL;
123 }
124 ary->wc_discrim = ntohl(ary->wc_discrim);
125 ary->wc_nchunks = ntohl(ary->wc_nchunks);
126 if (((unsigned long)&ary->wc_array[0] +
127 (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
128 (unsigned long)vaend) {
129 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
130 ary, ary->wc_nchunks, vaend);
131 return NULL;
132 }
133 for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
134 u64 ch_offset;
135
136 ary->wc_array[ch_no].wc_target.rs_handle =
137 ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
138 ary->wc_array[ch_no].wc_target.rs_length =
139 ntohl(ary->wc_array[ch_no].wc_target.rs_length);
140 va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
141 xdr_decode_hyper(va, &ch_offset);
142 put_unaligned(ch_offset, (u64 *)va);
143 }
144
145 /*
146 * rs_length is the 2nd 4B field in wc_target and taking its
147 * address skips the list terminator
148 */
149 return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length;
150}
151
152static u32 *decode_reply_array(u32 *va, u32 *vaend)
153{
154 int ch_no;
155 struct rpcrdma_write_array *ary =
156 (struct rpcrdma_write_array *)va;
157
158 /* Check for no reply-array */
159 if (ary->wc_discrim == xdr_zero)
160 return (u32 *)&ary->wc_nchunks;
161
162 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
163 (unsigned long)vaend) {
164 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
165 return NULL;
166 }
167 ary->wc_discrim = ntohl(ary->wc_discrim);
168 ary->wc_nchunks = ntohl(ary->wc_nchunks);
169 if (((unsigned long)&ary->wc_array[0] +
170 (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
171 (unsigned long)vaend) {
172 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
173 ary, ary->wc_nchunks, vaend);
174 return NULL;
175 }
176 for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
177 u64 ch_offset;
178
179 ary->wc_array[ch_no].wc_target.rs_handle =
180 ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
181 ary->wc_array[ch_no].wc_target.rs_length =
182 ntohl(ary->wc_array[ch_no].wc_target.rs_length);
183 va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
184 xdr_decode_hyper(va, &ch_offset);
185 put_unaligned(ch_offset, (u64 *)va);
186 }
187
188 return (u32 *)&ary->wc_array[ch_no];
189}
190
191int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
192 struct svc_rqst *rqstp)
193{
194 struct rpcrdma_msg *rmsgp = NULL;
195 u32 *va;
196 u32 *vaend;
197 u32 hdr_len;
198
199 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
200
201 /* Verify that there's enough bytes for header + something */
202 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
203 dprintk("svcrdma: header too short = %d\n",
204 rqstp->rq_arg.len);
205 return -EINVAL;
206 }
207
208 /* Decode the header */
209 rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
210 rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
211 rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
212 rmsgp->rm_type = ntohl(rmsgp->rm_type);
213
214 if (rmsgp->rm_vers != RPCRDMA_VERSION)
215 return -ENOSYS;
216
217 /* Pull in the extra for the padded case and bump our pointer */
218 if (rmsgp->rm_type == RDMA_MSGP) {
219 int hdrlen;
220 rmsgp->rm_body.rm_padded.rm_align =
221 ntohl(rmsgp->rm_body.rm_padded.rm_align);
222 rmsgp->rm_body.rm_padded.rm_thresh =
223 ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
224
225 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
226 rqstp->rq_arg.head[0].iov_base = va;
227 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
228 rqstp->rq_arg.head[0].iov_len -= hdrlen;
229 if (hdrlen > rqstp->rq_arg.len)
230 return -EINVAL;
231 return hdrlen;
232 }
233
234 /* The chunk list may contain either a read chunk list or a write
235 * chunk list and a reply chunk list.
236 */
237 va = &rmsgp->rm_body.rm_chunks[0];
238 vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
239 va = decode_read_list(va, vaend);
240 if (!va)
241 return -EINVAL;
242 va = decode_write_list(va, vaend);
243 if (!va)
244 return -EINVAL;
245 va = decode_reply_array(va, vaend);
246 if (!va)
247 return -EINVAL;
248
249 rqstp->rq_arg.head[0].iov_base = va;
250 hdr_len = (unsigned long)va - (unsigned long)rmsgp;
251 rqstp->rq_arg.head[0].iov_len -= hdr_len;
252
253 *rdma_req = rmsgp;
254 return hdr_len;
255}
256
257int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
258{
259 struct rpcrdma_msg *rmsgp = NULL;
260 struct rpcrdma_read_chunk *ch;
261 struct rpcrdma_write_array *ary;
262 u32 *va;
263 u32 hdrlen;
264
265 dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
266 rqstp);
267 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
268
269 /* Pull in the extra for the padded case and bump our pointer */
270 if (rmsgp->rm_type == RDMA_MSGP) {
271 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
272 rqstp->rq_arg.head[0].iov_base = va;
273 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
274 rqstp->rq_arg.head[0].iov_len -= hdrlen;
275 return hdrlen;
276 }
277
278 /*
279 * Skip all chunks to find RPC msg. These were previously processed
280 */
281 va = &rmsgp->rm_body.rm_chunks[0];
282
283 /* Skip read-list */
284 for (ch = (struct rpcrdma_read_chunk *)va;
285 ch->rc_discrim != xdr_zero; ch++);
286 va = (u32 *)&ch->rc_position;
287
288 /* Skip write-list */
289 ary = (struct rpcrdma_write_array *)va;
290 if (ary->wc_discrim == xdr_zero)
291 va = (u32 *)&ary->wc_nchunks;
292 else
293 /*
294 * rs_length is the 2nd 4B field in wc_target and taking its
295 * address skips the list terminator
296 */
297 va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
298
299 /* Skip reply-array */
300 ary = (struct rpcrdma_write_array *)va;
301 if (ary->wc_discrim == xdr_zero)
302 va = (u32 *)&ary->wc_nchunks;
303 else
304 va = (u32 *)&ary->wc_array[ary->wc_nchunks];
305
306 rqstp->rq_arg.head[0].iov_base = va;
307 hdrlen = (unsigned long)va - (unsigned long)rmsgp;
308 rqstp->rq_arg.head[0].iov_len -= hdrlen;
309
310 return hdrlen;
311}
312
313int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
314 struct rpcrdma_msg *rmsgp,
315 enum rpcrdma_errcode err, u32 *va)
316{
317 u32 *startp = va;
318
319 *va++ = htonl(rmsgp->rm_xid);
320 *va++ = htonl(rmsgp->rm_vers);
321 *va++ = htonl(xprt->sc_max_requests);
322 *va++ = htonl(RDMA_ERROR);
323 *va++ = htonl(err);
324 if (err == ERR_VERS) {
325 *va++ = htonl(RPCRDMA_VERSION);
326 *va++ = htonl(RPCRDMA_VERSION);
327 }
328
329 return (int)((unsigned long)va - (unsigned long)startp);
330}
331
332int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
333{
334 struct rpcrdma_write_array *wr_ary;
335
336 /* There is no read-list in a reply */
337
338 /* skip write list */
339 wr_ary = (struct rpcrdma_write_array *)
340 &rmsgp->rm_body.rm_chunks[1];
341 if (wr_ary->wc_discrim)
342 wr_ary = (struct rpcrdma_write_array *)
343 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
344 wc_target.rs_length;
345 else
346 wr_ary = (struct rpcrdma_write_array *)
347 &wr_ary->wc_nchunks;
348
349 /* skip reply array */
350 if (wr_ary->wc_discrim)
351 wr_ary = (struct rpcrdma_write_array *)
352 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
353 else
354 wr_ary = (struct rpcrdma_write_array *)
355 &wr_ary->wc_nchunks;
356
357 return (unsigned long) wr_ary - (unsigned long) rmsgp;
358}
359
360void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
361{
362 struct rpcrdma_write_array *ary;
363
364 /* no read-list */
365 rmsgp->rm_body.rm_chunks[0] = xdr_zero;
366
367 /* write-array discrim */
368 ary = (struct rpcrdma_write_array *)
369 &rmsgp->rm_body.rm_chunks[1];
370 ary->wc_discrim = xdr_one;
371 ary->wc_nchunks = htonl(chunks);
372
373 /* write-list terminator */
374 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
375
376 /* reply-array discriminator */
377 ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
378}
379
380void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
381 int chunks)
382{
383 ary->wc_discrim = xdr_one;
384 ary->wc_nchunks = htonl(chunks);
385}
386
387void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
388 int chunk_no,
389 u32 rs_handle, u64 rs_offset,
390 u32 write_len)
391{
392 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
393 seg->rs_handle = htonl(rs_handle);
394 seg->rs_length = htonl(write_len);
395 xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset);
396}
397
398void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
399 struct rpcrdma_msg *rdma_argp,
400 struct rpcrdma_msg *rdma_resp,
401 enum rpcrdma_proc rdma_type)
402{
403 rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
404 rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
405 rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
406 rdma_resp->rm_type = htonl(rdma_type);
407
408 /* Encode <nul> chunks lists */
409 rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
410 rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
411 rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
412}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
new file mode 100644
index 000000000000..ab54a736486e
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -0,0 +1,586 @@
1/*
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41
42#include <linux/sunrpc/debug.h>
43#include <linux/sunrpc/rpc_rdma.h>
44#include <linux/spinlock.h>
45#include <asm/unaligned.h>
46#include <rdma/ib_verbs.h>
47#include <rdma/rdma_cm.h>
48#include <linux/sunrpc/svc_rdma.h>
49
50#define RPCDBG_FACILITY RPCDBG_SVCXPRT
51
52/*
53 * Replace the pages in the rq_argpages array with the pages from the SGE in
54 * the RDMA_RECV completion. The SGL should contain full pages up until the
55 * last one.
56 */
57static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
58 struct svc_rdma_op_ctxt *ctxt,
59 u32 byte_count)
60{
61 struct page *page;
62 u32 bc;
63 int sge_no;
64
65 /* Swap the page in the SGE with the page in argpages */
66 page = ctxt->pages[0];
67 put_page(rqstp->rq_pages[0]);
68 rqstp->rq_pages[0] = page;
69
70 /* Set up the XDR head */
71 rqstp->rq_arg.head[0].iov_base = page_address(page);
72 rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
73 rqstp->rq_arg.len = byte_count;
74 rqstp->rq_arg.buflen = byte_count;
75
76 /* Compute bytes past head in the SGL */
77 bc = byte_count - rqstp->rq_arg.head[0].iov_len;
78
79 /* If data remains, store it in the pagelist */
80 rqstp->rq_arg.page_len = bc;
81 rqstp->rq_arg.page_base = 0;
82 rqstp->rq_arg.pages = &rqstp->rq_pages[1];
83 sge_no = 1;
84 while (bc && sge_no < ctxt->count) {
85 page = ctxt->pages[sge_no];
86 put_page(rqstp->rq_pages[sge_no]);
87 rqstp->rq_pages[sge_no] = page;
88 bc -= min(bc, ctxt->sge[sge_no].length);
89 rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
90 sge_no++;
91 }
92 rqstp->rq_respages = &rqstp->rq_pages[sge_no];
93
94 /* We should never run out of SGE because the limit is defined to
95 * support the max allowed RPC data length
96 */
97 BUG_ON(bc && (sge_no == ctxt->count));
98 BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
99 != byte_count);
100 BUG_ON(rqstp->rq_arg.len != byte_count);
101
102 /* If not all pages were used from the SGL, free the remaining ones */
103 bc = sge_no;
104 while (sge_no < ctxt->count) {
105 page = ctxt->pages[sge_no++];
106 put_page(page);
107 }
108 ctxt->count = bc;
109
110 /* Set up tail */
111 rqstp->rq_arg.tail[0].iov_base = NULL;
112 rqstp->rq_arg.tail[0].iov_len = 0;
113}
114
115struct chunk_sge {
116 int start; /* sge no for this chunk */
117 int count; /* sge count for this chunk */
118};
119
120/* Encode a read-chunk-list as an array of IB SGE
121 *
122 * Assumptions:
123 * - chunk[0]->position points to pages[0] at an offset of 0
124 * - pages[] is not physically or virtually contigous and consists of
125 * PAGE_SIZE elements.
126 *
127 * Output:
128 * - sge array pointing into pages[] array.
129 * - chunk_sge array specifying sge index and count for each
130 * chunk in the read list
131 *
132 */
133static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
134 struct svc_rqst *rqstp,
135 struct svc_rdma_op_ctxt *head,
136 struct rpcrdma_msg *rmsgp,
137 struct ib_sge *sge,
138 struct chunk_sge *ch_sge_ary,
139 int ch_count,
140 int byte_count)
141{
142 int sge_no;
143 int sge_bytes;
144 int page_off;
145 int page_no;
146 int ch_bytes;
147 int ch_no;
148 struct rpcrdma_read_chunk *ch;
149
150 sge_no = 0;
151 page_no = 0;
152 page_off = 0;
153 ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
154 ch_no = 0;
155 ch_bytes = ch->rc_target.rs_length;
156 head->arg.head[0] = rqstp->rq_arg.head[0];
157 head->arg.tail[0] = rqstp->rq_arg.tail[0];
158 head->arg.pages = &head->pages[head->count];
159 head->sge[0].length = head->count; /* save count of hdr pages */
160 head->arg.page_base = 0;
161 head->arg.page_len = ch_bytes;
162 head->arg.len = rqstp->rq_arg.len + ch_bytes;
163 head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
164 head->count++;
165 ch_sge_ary[0].start = 0;
166 while (byte_count) {
167 sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
168 sge[sge_no].addr =
169 ib_dma_map_page(xprt->sc_cm_id->device,
170 rqstp->rq_arg.pages[page_no],
171 page_off, sge_bytes,
172 DMA_FROM_DEVICE);
173 sge[sge_no].length = sge_bytes;
174 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
175 /*
176 * Don't bump head->count here because the same page
177 * may be used by multiple SGE.
178 */
179 head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
180 rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
181
182 byte_count -= sge_bytes;
183 ch_bytes -= sge_bytes;
184 sge_no++;
185 /*
186 * If all bytes for this chunk have been mapped to an
187 * SGE, move to the next SGE
188 */
189 if (ch_bytes == 0) {
190 ch_sge_ary[ch_no].count =
191 sge_no - ch_sge_ary[ch_no].start;
192 ch_no++;
193 ch++;
194 ch_sge_ary[ch_no].start = sge_no;
195 ch_bytes = ch->rc_target.rs_length;
196 /* If bytes remaining account for next chunk */
197 if (byte_count) {
198 head->arg.page_len += ch_bytes;
199 head->arg.len += ch_bytes;
200 head->arg.buflen += ch_bytes;
201 }
202 }
203 /*
204 * If this SGE consumed all of the page, move to the
205 * next page
206 */
207 if ((sge_bytes + page_off) == PAGE_SIZE) {
208 page_no++;
209 page_off = 0;
210 /*
211 * If there are still bytes left to map, bump
212 * the page count
213 */
214 if (byte_count)
215 head->count++;
216 } else
217 page_off += sge_bytes;
218 }
219 BUG_ON(byte_count != 0);
220 return sge_no;
221}
222
223static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
224 struct ib_sge *sge,
225 u64 *sgl_offset,
226 int count)
227{
228 int i;
229
230 ctxt->count = count;
231 for (i = 0; i < count; i++) {
232 ctxt->sge[i].addr = sge[i].addr;
233 ctxt->sge[i].length = sge[i].length;
234 *sgl_offset = *sgl_offset + sge[i].length;
235 }
236}
237
238static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
239{
240#ifdef RDMA_TRANSPORT_IWARP
241 if ((RDMA_TRANSPORT_IWARP ==
242 rdma_node_get_transport(xprt->sc_cm_id->
243 device->node_type))
244 && sge_count > 1)
245 return 1;
246 else
247#endif
248 return min_t(int, sge_count, xprt->sc_max_sge);
249}
250
251/*
252 * Use RDMA_READ to read data from the advertised client buffer into the
253 * XDR stream starting at rq_arg.head[0].iov_base.
254 * Each chunk in the array
255 * contains the following fields:
256 * discrim - '1', This isn't used for data placement
257 * position - The xdr stream offset (the same for every chunk)
258 * handle - RMR for client memory region
259 * length - data transfer length
260 * offset - 64 bit tagged offset in remote memory region
261 *
262 * On our side, we need to read into a pagelist. The first page immediately
263 * follows the RPC header.
264 *
265 * This function returns 1 to indicate success. The data is not yet in
266 * the pagelist and therefore the RPC request must be deferred. The
267 * I/O completion will enqueue the transport again and
268 * svc_rdma_recvfrom will complete the request.
269 *
270 * NOTE: The ctxt must not be touched after the last WR has been posted
271 * because the I/O completion processing may occur on another
272 * processor and free / modify the context. Ne touche pas!
273 */
274static int rdma_read_xdr(struct svcxprt_rdma *xprt,
275 struct rpcrdma_msg *rmsgp,
276 struct svc_rqst *rqstp,
277 struct svc_rdma_op_ctxt *hdr_ctxt)
278{
279 struct ib_send_wr read_wr;
280 int err = 0;
281 int ch_no;
282 struct ib_sge *sge;
283 int ch_count;
284 int byte_count;
285 int sge_count;
286 u64 sgl_offset;
287 struct rpcrdma_read_chunk *ch;
288 struct svc_rdma_op_ctxt *ctxt = NULL;
289 struct svc_rdma_op_ctxt *head;
290 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
291 struct svc_rdma_op_ctxt *tmp_ch_ctxt;
292 struct chunk_sge *ch_sge_ary;
293
294 /* If no read list is present, return 0 */
295 ch = svc_rdma_get_read_chunk(rmsgp);
296 if (!ch)
297 return 0;
298
299 /* Allocate temporary contexts to keep SGE */
300 BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
301 tmp_sge_ctxt = svc_rdma_get_context(xprt);
302 sge = tmp_sge_ctxt->sge;
303 tmp_ch_ctxt = svc_rdma_get_context(xprt);
304 ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
305
306 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
307 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
308 sge, ch_sge_ary,
309 ch_count, byte_count);
310 head = svc_rdma_get_context(xprt);
311 sgl_offset = 0;
312 ch_no = 0;
313
314 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
315 ch->rc_discrim != 0; ch++, ch_no++) {
316next_sge:
317 if (!ctxt)
318 ctxt = head;
319 else {
320 ctxt->next = svc_rdma_get_context(xprt);
321 ctxt = ctxt->next;
322 }
323 ctxt->next = NULL;
324 ctxt->direction = DMA_FROM_DEVICE;
325 clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
326 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
327 if ((ch+1)->rc_discrim == 0) {
328 /*
329 * Checked in sq_cq_reap to see if we need to
330 * be enqueued
331 */
332 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
333 ctxt->next = hdr_ctxt;
334 hdr_ctxt->next = head;
335 }
336
337 /* Prepare READ WR */
338 memset(&read_wr, 0, sizeof read_wr);
339 ctxt->wr_op = IB_WR_RDMA_READ;
340 read_wr.wr_id = (unsigned long)ctxt;
341 read_wr.opcode = IB_WR_RDMA_READ;
342 read_wr.send_flags = IB_SEND_SIGNALED;
343 read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
344 read_wr.wr.rdma.remote_addr =
345 get_unaligned(&(ch->rc_target.rs_offset)) +
346 sgl_offset;
347 read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
348 read_wr.num_sge =
349 rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
350 rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
351 &sgl_offset,
352 read_wr.num_sge);
353
354 /* Post the read */
355 err = svc_rdma_send(xprt, &read_wr);
356 if (err) {
357 printk(KERN_ERR "svcrdma: Error posting send = %d\n",
358 err);
359 /*
360 * Break the circular list so free knows when
361 * to stop if the error happened to occur on
362 * the last read
363 */
364 ctxt->next = NULL;
365 goto out;
366 }
367 atomic_inc(&rdma_stat_read);
368
369 if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
370 ch_sge_ary[ch_no].count -= read_wr.num_sge;
371 ch_sge_ary[ch_no].start += read_wr.num_sge;
372 goto next_sge;
373 }
374 sgl_offset = 0;
375 err = 0;
376 }
377
378 out:
379 svc_rdma_put_context(tmp_sge_ctxt, 0);
380 svc_rdma_put_context(tmp_ch_ctxt, 0);
381
382 /* Detach arg pages. svc_recv will replenish them */
383 for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
384 rqstp->rq_pages[ch_no] = NULL;
385
386 /*
387 * Detach res pages. svc_release must see a resused count of
388 * zero or it will attempt to put them.
389 */
390 while (rqstp->rq_resused)
391 rqstp->rq_respages[--rqstp->rq_resused] = NULL;
392
393 if (err) {
394 printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
395 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
396 /* Free the linked list of read contexts */
397 while (head != NULL) {
398 ctxt = head->next;
399 svc_rdma_put_context(head, 1);
400 head = ctxt;
401 }
402 return 0;
403 }
404
405 return 1;
406}
407
408static int rdma_read_complete(struct svc_rqst *rqstp,
409 struct svc_rdma_op_ctxt *data)
410{
411 struct svc_rdma_op_ctxt *head = data->next;
412 int page_no;
413 int ret;
414
415 BUG_ON(!head);
416
417 /* Copy RPC pages */
418 for (page_no = 0; page_no < head->count; page_no++) {
419 put_page(rqstp->rq_pages[page_no]);
420 rqstp->rq_pages[page_no] = head->pages[page_no];
421 }
422 /* Point rq_arg.pages past header */
423 rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
424 rqstp->rq_arg.page_len = head->arg.page_len;
425 rqstp->rq_arg.page_base = head->arg.page_base;
426
427 /* rq_respages starts after the last arg page */
428 rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
429 rqstp->rq_resused = 0;
430
431 /* Rebuild rq_arg head and tail. */
432 rqstp->rq_arg.head[0] = head->arg.head[0];
433 rqstp->rq_arg.tail[0] = head->arg.tail[0];
434 rqstp->rq_arg.len = head->arg.len;
435 rqstp->rq_arg.buflen = head->arg.buflen;
436
437 /* XXX: What should this be? */
438 rqstp->rq_prot = IPPROTO_MAX;
439
440 /*
441 * Free the contexts we used to build the RDMA_READ. We have
442 * to be careful here because the context list uses the same
443 * next pointer used to chain the contexts associated with the
444 * RDMA_READ
445 */
446 data->next = NULL; /* terminate circular list */
447 do {
448 data = head->next;
449 svc_rdma_put_context(head, 0);
450 head = data;
451 } while (head != NULL);
452
453 ret = rqstp->rq_arg.head[0].iov_len
454 + rqstp->rq_arg.page_len
455 + rqstp->rq_arg.tail[0].iov_len;
456 dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
457 "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
458 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
459 rqstp->rq_arg.head[0].iov_len);
460
461 /* Indicate that we've consumed an RQ credit */
462 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
463 svc_xprt_received(rqstp->rq_xprt);
464 return ret;
465}
466
467/*
468 * Set up the rqstp thread context to point to the RQ buffer. If
469 * necessary, pull additional data from the client with an RDMA_READ
470 * request.
471 */
472int svc_rdma_recvfrom(struct svc_rqst *rqstp)
473{
474 struct svc_xprt *xprt = rqstp->rq_xprt;
475 struct svcxprt_rdma *rdma_xprt =
476 container_of(xprt, struct svcxprt_rdma, sc_xprt);
477 struct svc_rdma_op_ctxt *ctxt = NULL;
478 struct rpcrdma_msg *rmsgp;
479 int ret = 0;
480 int len;
481
482 dprintk("svcrdma: rqstp=%p\n", rqstp);
483
484 /*
485 * The rq_xprt_ctxt indicates if we've consumed an RQ credit
486 * or not. It is used in the rdma xpo_release_rqst function to
487 * determine whether or not to return an RQ WQE to the RQ.
488 */
489 rqstp->rq_xprt_ctxt = NULL;
490
491 spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
492 if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
493 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
494 struct svc_rdma_op_ctxt,
495 dto_q);
496 list_del_init(&ctxt->dto_q);
497 }
498 spin_unlock_bh(&rdma_xprt->sc_read_complete_lock);
499 if (ctxt)
500 return rdma_read_complete(rqstp, ctxt);
501
502 spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
503 if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
504 ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
505 struct svc_rdma_op_ctxt,
506 dto_q);
507 list_del_init(&ctxt->dto_q);
508 } else {
509 atomic_inc(&rdma_stat_rq_starve);
510 clear_bit(XPT_DATA, &xprt->xpt_flags);
511 ctxt = NULL;
512 }
513 spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
514 if (!ctxt) {
515 /* This is the EAGAIN path. The svc_recv routine will
516 * return -EAGAIN, the nfsd thread will go to call into
517 * svc_recv again and we shouldn't be on the active
518 * transport list
519 */
520 if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
521 goto close_out;
522
523 BUG_ON(ret);
524 goto out;
525 }
526 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
527 ctxt, rdma_xprt, rqstp, ctxt->wc_status);
528 BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
529 atomic_inc(&rdma_stat_recv);
530
531 /* Build up the XDR from the receive buffers. */
532 rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
533
534 /* Decode the RDMA header. */
535 len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
536 rqstp->rq_xprt_hlen = len;
537
538 /* If the request is invalid, reply with an error */
539 if (len < 0) {
540 if (len == -ENOSYS)
541 (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
542 goto close_out;
543 }
544
545 /* Read read-list data. If we would need to wait, defer
546 * it. Not that in this case, we don't return the RQ credit
547 * until after the read completes.
548 */
549 if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
550 svc_xprt_received(xprt);
551 return 0;
552 }
553
554 /* Indicate we've consumed an RQ credit */
555 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
556
557 ret = rqstp->rq_arg.head[0].iov_len
558 + rqstp->rq_arg.page_len
559 + rqstp->rq_arg.tail[0].iov_len;
560 svc_rdma_put_context(ctxt, 0);
561 out:
562 dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
563 "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
564 ret, rqstp->rq_arg.len,
565 rqstp->rq_arg.head[0].iov_base,
566 rqstp->rq_arg.head[0].iov_len);
567 rqstp->rq_prot = IPPROTO_MAX;
568 svc_xprt_copy_addrs(rqstp, xprt);
569 svc_xprt_received(xprt);
570 return ret;
571
572 close_out:
573 if (ctxt) {
574 svc_rdma_put_context(ctxt, 1);
575 /* Indicate we've consumed an RQ credit */
576 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
577 }
578 dprintk("svcrdma: transport %p is closing\n", xprt);
579 /*
580 * Set the close bit and enqueue it. svc_recv will see the
581 * close bit and call svc_xprt_delete
582 */
583 set_bit(XPT_CLOSE, &xprt->xpt_flags);
584 svc_xprt_received(xprt);
585 return 0;
586}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
new file mode 100644
index 000000000000..3e321949e1dc
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -0,0 +1,520 @@
1/*
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41
42#include <linux/sunrpc/debug.h>
43#include <linux/sunrpc/rpc_rdma.h>
44#include <linux/spinlock.h>
45#include <asm/unaligned.h>
46#include <rdma/ib_verbs.h>
47#include <rdma/rdma_cm.h>
48#include <linux/sunrpc/svc_rdma.h>
49
50#define RPCDBG_FACILITY RPCDBG_SVCXPRT
51
52/* Encode an XDR as an array of IB SGE
53 *
54 * Assumptions:
55 * - head[0] is physically contiguous.
56 * - tail[0] is physically contiguous.
57 * - pages[] is not physically or virtually contigous and consists of
58 * PAGE_SIZE elements.
59 *
60 * Output:
61 * SGE[0] reserved for RCPRDMA header
62 * SGE[1] data from xdr->head[]
63 * SGE[2..sge_count-2] data from xdr->pages[]
64 * SGE[sge_count-1] data from xdr->tail.
65 *
66 */
67static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
68 struct xdr_buf *xdr,
69 struct ib_sge *sge,
70 int *sge_count)
71{
72 /* Max we need is the length of the XDR / pagesize + one for
73 * head + one for tail + one for RPCRDMA header
74 */
75 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
76 int sge_no;
77 u32 byte_count = xdr->len;
78 u32 sge_bytes;
79 u32 page_bytes;
80 int page_off;
81 int page_no;
82
83 /* Skip the first sge, this is for the RPCRDMA header */
84 sge_no = 1;
85
86 /* Head SGE */
87 sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device,
88 xdr->head[0].iov_base,
89 xdr->head[0].iov_len,
90 DMA_TO_DEVICE);
91 sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
92 byte_count -= sge_bytes;
93 sge[sge_no].length = sge_bytes;
94 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
95 sge_no++;
96
97 /* pages SGE */
98 page_no = 0;
99 page_bytes = xdr->page_len;
100 page_off = xdr->page_base;
101 while (byte_count && page_bytes) {
102 sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off));
103 sge[sge_no].addr =
104 ib_dma_map_page(xprt->sc_cm_id->device,
105 xdr->pages[page_no], page_off,
106 sge_bytes, DMA_TO_DEVICE);
107 sge_bytes = min(sge_bytes, page_bytes);
108 byte_count -= sge_bytes;
109 page_bytes -= sge_bytes;
110 sge[sge_no].length = sge_bytes;
111 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
112
113 sge_no++;
114 page_no++;
115 page_off = 0; /* reset for next time through loop */
116 }
117
118 /* Tail SGE */
119 if (byte_count && xdr->tail[0].iov_len) {
120 sge[sge_no].addr =
121 ib_dma_map_single(xprt->sc_cm_id->device,
122 xdr->tail[0].iov_base,
123 xdr->tail[0].iov_len,
124 DMA_TO_DEVICE);
125 sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
126 byte_count -= sge_bytes;
127 sge[sge_no].length = sge_bytes;
128 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
129 sge_no++;
130 }
131
132 BUG_ON(sge_no > sge_max);
133 BUG_ON(byte_count != 0);
134
135 *sge_count = sge_no;
136 return sge;
137}
138
139
140/* Assumptions:
141 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
142 */
143static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
144 u32 rmr, u64 to,
145 u32 xdr_off, int write_len,
146 struct ib_sge *xdr_sge, int sge_count)
147{
148 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
149 struct ib_send_wr write_wr;
150 struct ib_sge *sge;
151 int xdr_sge_no;
152 int sge_no;
153 int sge_bytes;
154 int sge_off;
155 int bc;
156 struct svc_rdma_op_ctxt *ctxt;
157 int ret = 0;
158
159 BUG_ON(sge_count >= 32);
160 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
161 "write_len=%d, xdr_sge=%p, sge_count=%d\n",
162 rmr, to, xdr_off, write_len, xdr_sge, sge_count);
163
164 ctxt = svc_rdma_get_context(xprt);
165 ctxt->count = 0;
166 tmp_sge_ctxt = svc_rdma_get_context(xprt);
167 sge = tmp_sge_ctxt->sge;
168
169 /* Find the SGE associated with xdr_off */
170 for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count;
171 xdr_sge_no++) {
172 if (xdr_sge[xdr_sge_no].length > bc)
173 break;
174 bc -= xdr_sge[xdr_sge_no].length;
175 }
176
177 sge_off = bc;
178 bc = write_len;
179 sge_no = 0;
180
181 /* Copy the remaining SGE */
182 while (bc != 0 && xdr_sge_no < sge_count) {
183 sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off;
184 sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
185 sge_bytes = min((size_t)bc,
186 (size_t)(xdr_sge[xdr_sge_no].length-sge_off));
187 sge[sge_no].length = sge_bytes;
188
189 sge_off = 0;
190 sge_no++;
191 xdr_sge_no++;
192 bc -= sge_bytes;
193 }
194
195 BUG_ON(bc != 0);
196 BUG_ON(xdr_sge_no > sge_count);
197
198 /* Prepare WRITE WR */
199 memset(&write_wr, 0, sizeof write_wr);
200 ctxt->wr_op = IB_WR_RDMA_WRITE;
201 write_wr.wr_id = (unsigned long)ctxt;
202 write_wr.sg_list = &sge[0];
203 write_wr.num_sge = sge_no;
204 write_wr.opcode = IB_WR_RDMA_WRITE;
205 write_wr.send_flags = IB_SEND_SIGNALED;
206 write_wr.wr.rdma.rkey = rmr;
207 write_wr.wr.rdma.remote_addr = to;
208
209 /* Post It */
210 atomic_inc(&rdma_stat_write);
211 if (svc_rdma_send(xprt, &write_wr)) {
212 svc_rdma_put_context(ctxt, 1);
213 /* Fatal error, close transport */
214 ret = -EIO;
215 }
216 svc_rdma_put_context(tmp_sge_ctxt, 0);
217 return ret;
218}
219
220static int send_write_chunks(struct svcxprt_rdma *xprt,
221 struct rpcrdma_msg *rdma_argp,
222 struct rpcrdma_msg *rdma_resp,
223 struct svc_rqst *rqstp,
224 struct ib_sge *sge,
225 int sge_count)
226{
227 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
228 int write_len;
229 int max_write;
230 u32 xdr_off;
231 int chunk_off;
232 int chunk_no;
233 struct rpcrdma_write_array *arg_ary;
234 struct rpcrdma_write_array *res_ary;
235 int ret;
236
237 arg_ary = svc_rdma_get_write_array(rdma_argp);
238 if (!arg_ary)
239 return 0;
240 res_ary = (struct rpcrdma_write_array *)
241 &rdma_resp->rm_body.rm_chunks[1];
242
243 max_write = xprt->sc_max_sge * PAGE_SIZE;
244
245 /* Write chunks start at the pagelist */
246 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
247 xfer_len && chunk_no < arg_ary->wc_nchunks;
248 chunk_no++) {
249 struct rpcrdma_segment *arg_ch;
250 u64 rs_offset;
251
252 arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
253 write_len = min(xfer_len, arg_ch->rs_length);
254
255 /* Prepare the response chunk given the length actually
256 * written */
257 rs_offset = get_unaligned(&(arg_ch->rs_offset));
258 svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
259 arg_ch->rs_handle,
260 rs_offset,
261 write_len);
262 chunk_off = 0;
263 while (write_len) {
264 int this_write;
265 this_write = min(write_len, max_write);
266 ret = send_write(xprt, rqstp,
267 arg_ch->rs_handle,
268 rs_offset + chunk_off,
269 xdr_off,
270 this_write,
271 sge,
272 sge_count);
273 if (ret) {
274 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
275 ret);
276 return -EIO;
277 }
278 chunk_off += this_write;
279 xdr_off += this_write;
280 xfer_len -= this_write;
281 write_len -= this_write;
282 }
283 }
284 /* Update the req with the number of chunks actually used */
285 svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
286
287 return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
288}
289
290static int send_reply_chunks(struct svcxprt_rdma *xprt,
291 struct rpcrdma_msg *rdma_argp,
292 struct rpcrdma_msg *rdma_resp,
293 struct svc_rqst *rqstp,
294 struct ib_sge *sge,
295 int sge_count)
296{
297 u32 xfer_len = rqstp->rq_res.len;
298 int write_len;
299 int max_write;
300 u32 xdr_off;
301 int chunk_no;
302 int chunk_off;
303 struct rpcrdma_segment *ch;
304 struct rpcrdma_write_array *arg_ary;
305 struct rpcrdma_write_array *res_ary;
306 int ret;
307
308 arg_ary = svc_rdma_get_reply_array(rdma_argp);
309 if (!arg_ary)
310 return 0;
311 /* XXX: need to fix when reply lists occur with read-list and or
312 * write-list */
313 res_ary = (struct rpcrdma_write_array *)
314 &rdma_resp->rm_body.rm_chunks[2];
315
316 max_write = xprt->sc_max_sge * PAGE_SIZE;
317
318 /* xdr offset starts at RPC message */
319 for (xdr_off = 0, chunk_no = 0;
320 xfer_len && chunk_no < arg_ary->wc_nchunks;
321 chunk_no++) {
322 u64 rs_offset;
323 ch = &arg_ary->wc_array[chunk_no].wc_target;
324 write_len = min(xfer_len, ch->rs_length);
325
326
327 /* Prepare the reply chunk given the length actually
328 * written */
329 rs_offset = get_unaligned(&(ch->rs_offset));
330 svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
331 ch->rs_handle, rs_offset,
332 write_len);
333 chunk_off = 0;
334 while (write_len) {
335 int this_write;
336
337 this_write = min(write_len, max_write);
338 ret = send_write(xprt, rqstp,
339 ch->rs_handle,
340 rs_offset + chunk_off,
341 xdr_off,
342 this_write,
343 sge,
344 sge_count);
345 if (ret) {
346 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
347 ret);
348 return -EIO;
349 }
350 chunk_off += this_write;
351 xdr_off += this_write;
352 xfer_len -= this_write;
353 write_len -= this_write;
354 }
355 }
356 /* Update the req with the number of chunks actually used */
357 svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
358
359 return rqstp->rq_res.len;
360}
361
362/* This function prepares the portion of the RPCRDMA message to be
363 * sent in the RDMA_SEND. This function is called after data sent via
364 * RDMA has already been transmitted. There are three cases:
365 * - The RPCRDMA header, RPC header, and payload are all sent in a
366 * single RDMA_SEND. This is the "inline" case.
367 * - The RPCRDMA header and some portion of the RPC header and data
368 * are sent via this RDMA_SEND and another portion of the data is
369 * sent via RDMA.
370 * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
371 * header and data are all transmitted via RDMA.
372 * In all three cases, this function prepares the RPCRDMA header in
373 * sge[0], the 'type' parameter indicates the type to place in the
374 * RPCRDMA header, and the 'byte_count' field indicates how much of
375 * the XDR to include in this RDMA_SEND.
376 */
377static int send_reply(struct svcxprt_rdma *rdma,
378 struct svc_rqst *rqstp,
379 struct page *page,
380 struct rpcrdma_msg *rdma_resp,
381 struct svc_rdma_op_ctxt *ctxt,
382 int sge_count,
383 int byte_count)
384{
385 struct ib_send_wr send_wr;
386 int sge_no;
387 int sge_bytes;
388 int page_no;
389 int ret;
390
391 /* Prepare the context */
392 ctxt->pages[0] = page;
393 ctxt->count = 1;
394
395 /* Prepare the SGE for the RPCRDMA Header */
396 ctxt->sge[0].addr =
397 ib_dma_map_page(rdma->sc_cm_id->device,
398 page, 0, PAGE_SIZE, DMA_TO_DEVICE);
399 ctxt->direction = DMA_TO_DEVICE;
400 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
401 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
402
403 /* Determine how many of our SGE are to be transmitted */
404 for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) {
405 sge_bytes = min((size_t)ctxt->sge[sge_no].length,
406 (size_t)byte_count);
407 byte_count -= sge_bytes;
408 }
409 BUG_ON(byte_count != 0);
410
411 /* Save all respages in the ctxt and remove them from the
412 * respages array. They are our pages until the I/O
413 * completes.
414 */
415 for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
416 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
417 ctxt->count++;
418 rqstp->rq_respages[page_no] = NULL;
419 }
420
421 BUG_ON(sge_no > rdma->sc_max_sge);
422 memset(&send_wr, 0, sizeof send_wr);
423 ctxt->wr_op = IB_WR_SEND;
424 send_wr.wr_id = (unsigned long)ctxt;
425 send_wr.sg_list = ctxt->sge;
426 send_wr.num_sge = sge_no;
427 send_wr.opcode = IB_WR_SEND;
428 send_wr.send_flags = IB_SEND_SIGNALED;
429
430 ret = svc_rdma_send(rdma, &send_wr);
431 if (ret)
432 svc_rdma_put_context(ctxt, 1);
433
434 return ret;
435}
436
437void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
438{
439}
440
441/*
442 * Return the start of an xdr buffer.
443 */
444static void *xdr_start(struct xdr_buf *xdr)
445{
446 return xdr->head[0].iov_base -
447 (xdr->len -
448 xdr->page_len -
449 xdr->tail[0].iov_len -
450 xdr->head[0].iov_len);
451}
452
453int svc_rdma_sendto(struct svc_rqst *rqstp)
454{
455 struct svc_xprt *xprt = rqstp->rq_xprt;
456 struct svcxprt_rdma *rdma =
457 container_of(xprt, struct svcxprt_rdma, sc_xprt);
458 struct rpcrdma_msg *rdma_argp;
459 struct rpcrdma_msg *rdma_resp;
460 struct rpcrdma_write_array *reply_ary;
461 enum rpcrdma_proc reply_type;
462 int ret;
463 int inline_bytes;
464 struct ib_sge *sge;
465 int sge_count = 0;
466 struct page *res_page;
467 struct svc_rdma_op_ctxt *ctxt;
468
469 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
470
471 /* Get the RDMA request header. */
472 rdma_argp = xdr_start(&rqstp->rq_arg);
473
474 /* Build an SGE for the XDR */
475 ctxt = svc_rdma_get_context(rdma);
476 ctxt->direction = DMA_TO_DEVICE;
477 sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count);
478
479 inline_bytes = rqstp->rq_res.len;
480
481 /* Create the RDMA response header */
482 res_page = svc_rdma_get_page();
483 rdma_resp = page_address(res_page);
484 reply_ary = svc_rdma_get_reply_array(rdma_argp);
485 if (reply_ary)
486 reply_type = RDMA_NOMSG;
487 else
488 reply_type = RDMA_MSG;
489 svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
490 rdma_resp, reply_type);
491
492 /* Send any write-chunk data and build resp write-list */
493 ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
494 rqstp, sge, sge_count);
495 if (ret < 0) {
496 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
497 ret);
498 goto error;
499 }
500 inline_bytes -= ret;
501
502 /* Send any reply-list data and update resp reply-list */
503 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
504 rqstp, sge, sge_count);
505 if (ret < 0) {
506 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
507 ret);
508 goto error;
509 }
510 inline_bytes -= ret;
511
512 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count,
513 inline_bytes);
514 dprintk("svcrdma: send_reply returns %d\n", ret);
515 return ret;
516 error:
517 svc_rdma_put_context(ctxt, 0);
518 put_page(res_page);
519 return ret;
520}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
new file mode 100644
index 000000000000..f09444c451bc
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -0,0 +1,1080 @@
1/*
2 * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41
42#include <linux/sunrpc/svc_xprt.h>
43#include <linux/sunrpc/debug.h>
44#include <linux/sunrpc/rpc_rdma.h>
45#include <linux/spinlock.h>
46#include <rdma/ib_verbs.h>
47#include <rdma/rdma_cm.h>
48#include <linux/sunrpc/svc_rdma.h>
49
50#define RPCDBG_FACILITY RPCDBG_SVCXPRT
51
52static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
53 struct sockaddr *sa, int salen,
54 int flags);
55static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
56static void svc_rdma_release_rqst(struct svc_rqst *);
57static void rdma_destroy_xprt(struct svcxprt_rdma *xprt);
58static void dto_tasklet_func(unsigned long data);
59static void svc_rdma_detach(struct svc_xprt *xprt);
60static void svc_rdma_free(struct svc_xprt *xprt);
61static int svc_rdma_has_wspace(struct svc_xprt *xprt);
62static void rq_cq_reap(struct svcxprt_rdma *xprt);
63static void sq_cq_reap(struct svcxprt_rdma *xprt);
64
65DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
66static DEFINE_SPINLOCK(dto_lock);
67static LIST_HEAD(dto_xprt_q);
68
69static struct svc_xprt_ops svc_rdma_ops = {
70 .xpo_create = svc_rdma_create,
71 .xpo_recvfrom = svc_rdma_recvfrom,
72 .xpo_sendto = svc_rdma_sendto,
73 .xpo_release_rqst = svc_rdma_release_rqst,
74 .xpo_detach = svc_rdma_detach,
75 .xpo_free = svc_rdma_free,
76 .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
77 .xpo_has_wspace = svc_rdma_has_wspace,
78 .xpo_accept = svc_rdma_accept,
79};
80
81struct svc_xprt_class svc_rdma_class = {
82 .xcl_name = "rdma",
83 .xcl_owner = THIS_MODULE,
84 .xcl_ops = &svc_rdma_ops,
85 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
86};
87
88static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
89{
90 int target;
91 int at_least_one = 0;
92 struct svc_rdma_op_ctxt *ctxt;
93
94 target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump,
95 xprt->sc_ctxt_max);
96
97 spin_lock_bh(&xprt->sc_ctxt_lock);
98 while (xprt->sc_ctxt_cnt < target) {
99 xprt->sc_ctxt_cnt++;
100 spin_unlock_bh(&xprt->sc_ctxt_lock);
101
102 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
103
104 spin_lock_bh(&xprt->sc_ctxt_lock);
105 if (ctxt) {
106 at_least_one = 1;
107 ctxt->next = xprt->sc_ctxt_head;
108 xprt->sc_ctxt_head = ctxt;
109 } else {
110 /* kmalloc failed...give up for now */
111 xprt->sc_ctxt_cnt--;
112 break;
113 }
114 }
115 spin_unlock_bh(&xprt->sc_ctxt_lock);
116 dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n",
117 xprt->sc_ctxt_max, xprt->sc_ctxt_cnt);
118 return at_least_one;
119}
120
121struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
122{
123 struct svc_rdma_op_ctxt *ctxt;
124
125 while (1) {
126 spin_lock_bh(&xprt->sc_ctxt_lock);
127 if (unlikely(xprt->sc_ctxt_head == NULL)) {
128 /* Try to bump my cache. */
129 spin_unlock_bh(&xprt->sc_ctxt_lock);
130
131 if (rdma_bump_context_cache(xprt))
132 continue;
133
134 printk(KERN_INFO "svcrdma: sleeping waiting for "
135 "context memory on xprt=%p\n",
136 xprt);
137 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
138 continue;
139 }
140 ctxt = xprt->sc_ctxt_head;
141 xprt->sc_ctxt_head = ctxt->next;
142 spin_unlock_bh(&xprt->sc_ctxt_lock);
143 ctxt->xprt = xprt;
144 INIT_LIST_HEAD(&ctxt->dto_q);
145 ctxt->count = 0;
146 break;
147 }
148 return ctxt;
149}
150
151void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
152{
153 struct svcxprt_rdma *xprt;
154 int i;
155
156 BUG_ON(!ctxt);
157 xprt = ctxt->xprt;
158 if (free_pages)
159 for (i = 0; i < ctxt->count; i++)
160 put_page(ctxt->pages[i]);
161
162 for (i = 0; i < ctxt->count; i++)
163 dma_unmap_single(xprt->sc_cm_id->device->dma_device,
164 ctxt->sge[i].addr,
165 ctxt->sge[i].length,
166 ctxt->direction);
167 spin_lock_bh(&xprt->sc_ctxt_lock);
168 ctxt->next = xprt->sc_ctxt_head;
169 xprt->sc_ctxt_head = ctxt;
170 spin_unlock_bh(&xprt->sc_ctxt_lock);
171}
172
173/* ib_cq event handler */
174static void cq_event_handler(struct ib_event *event, void *context)
175{
176 struct svc_xprt *xprt = context;
177 dprintk("svcrdma: received CQ event id=%d, context=%p\n",
178 event->event, context);
179 set_bit(XPT_CLOSE, &xprt->xpt_flags);
180}
181
182/* QP event handler */
183static void qp_event_handler(struct ib_event *event, void *context)
184{
185 struct svc_xprt *xprt = context;
186
187 switch (event->event) {
188 /* These are considered benign events */
189 case IB_EVENT_PATH_MIG:
190 case IB_EVENT_COMM_EST:
191 case IB_EVENT_SQ_DRAINED:
192 case IB_EVENT_QP_LAST_WQE_REACHED:
193 dprintk("svcrdma: QP event %d received for QP=%p\n",
194 event->event, event->element.qp);
195 break;
196 /* These are considered fatal events */
197 case IB_EVENT_PATH_MIG_ERR:
198 case IB_EVENT_QP_FATAL:
199 case IB_EVENT_QP_REQ_ERR:
200 case IB_EVENT_QP_ACCESS_ERR:
201 case IB_EVENT_DEVICE_FATAL:
202 default:
203 dprintk("svcrdma: QP ERROR event %d received for QP=%p, "
204 "closing transport\n",
205 event->event, event->element.qp);
206 set_bit(XPT_CLOSE, &xprt->xpt_flags);
207 break;
208 }
209}
210
211/*
212 * Data Transfer Operation Tasklet
213 *
214 * Walks a list of transports with I/O pending, removing entries as
215 * they are added to the server's I/O pending list. Two bits indicate
216 * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave
217 * spinlock that serializes access to the transport list with the RQ
218 * and SQ interrupt handlers.
219 */
220static void dto_tasklet_func(unsigned long data)
221{
222 struct svcxprt_rdma *xprt;
223 unsigned long flags;
224
225 spin_lock_irqsave(&dto_lock, flags);
226 while (!list_empty(&dto_xprt_q)) {
227 xprt = list_entry(dto_xprt_q.next,
228 struct svcxprt_rdma, sc_dto_q);
229 list_del_init(&xprt->sc_dto_q);
230 spin_unlock_irqrestore(&dto_lock, flags);
231
232 if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) {
233 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
234 rq_cq_reap(xprt);
235 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
236 /*
237 * If data arrived before established event,
238 * don't enqueue. This defers RPC I/O until the
239 * RDMA connection is complete.
240 */
241 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
242 svc_xprt_enqueue(&xprt->sc_xprt);
243 }
244
245 if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) {
246 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
247 sq_cq_reap(xprt);
248 }
249
250 spin_lock_irqsave(&dto_lock, flags);
251 }
252 spin_unlock_irqrestore(&dto_lock, flags);
253}
254
255/*
256 * Receive Queue Completion Handler
257 *
258 * Since an RQ completion handler is called on interrupt context, we
259 * need to defer the handling of the I/O to a tasklet
260 */
261static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
262{
263 struct svcxprt_rdma *xprt = cq_context;
264 unsigned long flags;
265
266 /*
267 * Set the bit regardless of whether or not it's on the list
268 * because it may be on the list already due to an SQ
269 * completion.
270 */
271 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
272
273 /*
274 * If this transport is not already on the DTO transport queue,
275 * add it
276 */
277 spin_lock_irqsave(&dto_lock, flags);
278 if (list_empty(&xprt->sc_dto_q))
279 list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
280 spin_unlock_irqrestore(&dto_lock, flags);
281
282 /* Tasklet does all the work to avoid irqsave locks. */
283 tasklet_schedule(&dto_tasklet);
284}
285
286/*
287 * rq_cq_reap - Process the RQ CQ.
288 *
289 * Take all completing WC off the CQE and enqueue the associated DTO
290 * context on the dto_q for the transport.
291 */
292static void rq_cq_reap(struct svcxprt_rdma *xprt)
293{
294 int ret;
295 struct ib_wc wc;
296 struct svc_rdma_op_ctxt *ctxt = NULL;
297
298 atomic_inc(&rdma_stat_rq_poll);
299
300 spin_lock_bh(&xprt->sc_rq_dto_lock);
301 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
302 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
303 ctxt->wc_status = wc.status;
304 ctxt->byte_len = wc.byte_len;
305 if (wc.status != IB_WC_SUCCESS) {
306 /* Close the transport */
307 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
308 svc_rdma_put_context(ctxt, 1);
309 continue;
310 }
311 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
312 }
313 spin_unlock_bh(&xprt->sc_rq_dto_lock);
314
315 if (ctxt)
316 atomic_inc(&rdma_stat_rq_prod);
317}
318
319/*
320 * Send Queue Completion Handler - potentially called on interrupt context.
321 */
322static void sq_cq_reap(struct svcxprt_rdma *xprt)
323{
324 struct svc_rdma_op_ctxt *ctxt = NULL;
325 struct ib_wc wc;
326 struct ib_cq *cq = xprt->sc_sq_cq;
327 int ret;
328
329 atomic_inc(&rdma_stat_sq_poll);
330 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
331 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
332 xprt = ctxt->xprt;
333
334 if (wc.status != IB_WC_SUCCESS)
335 /* Close the transport */
336 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
337
338 /* Decrement used SQ WR count */
339 atomic_dec(&xprt->sc_sq_count);
340 wake_up(&xprt->sc_send_wait);
341
342 switch (ctxt->wr_op) {
343 case IB_WR_SEND:
344 case IB_WR_RDMA_WRITE:
345 svc_rdma_put_context(ctxt, 1);
346 break;
347
348 case IB_WR_RDMA_READ:
349 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
350 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
351 set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
352 spin_lock_bh(&xprt->sc_read_complete_lock);
353 list_add_tail(&ctxt->dto_q,
354 &xprt->sc_read_complete_q);
355 spin_unlock_bh(&xprt->sc_read_complete_lock);
356 svc_xprt_enqueue(&xprt->sc_xprt);
357 }
358 break;
359
360 default:
361 printk(KERN_ERR "svcrdma: unexpected completion type, "
362 "opcode=%d, status=%d\n",
363 wc.opcode, wc.status);
364 break;
365 }
366 }
367
368 if (ctxt)
369 atomic_inc(&rdma_stat_sq_prod);
370}
371
372static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
373{
374 struct svcxprt_rdma *xprt = cq_context;
375 unsigned long flags;
376
377 /*
378 * Set the bit regardless of whether or not it's on the list
379 * because it may be on the list already due to an RQ
380 * completion.
381 */
382 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
383
384 /*
385 * If this transport is not already on the DTO transport queue,
386 * add it
387 */
388 spin_lock_irqsave(&dto_lock, flags);
389 if (list_empty(&xprt->sc_dto_q))
390 list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
391 spin_unlock_irqrestore(&dto_lock, flags);
392
393 /* Tasklet does all the work to avoid irqsave locks. */
394 tasklet_schedule(&dto_tasklet);
395}
396
397static void create_context_cache(struct svcxprt_rdma *xprt,
398 int ctxt_count, int ctxt_bump, int ctxt_max)
399{
400 struct svc_rdma_op_ctxt *ctxt;
401 int i;
402
403 xprt->sc_ctxt_max = ctxt_max;
404 xprt->sc_ctxt_bump = ctxt_bump;
405 xprt->sc_ctxt_cnt = 0;
406 xprt->sc_ctxt_head = NULL;
407 for (i = 0; i < ctxt_count; i++) {
408 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
409 if (ctxt) {
410 ctxt->next = xprt->sc_ctxt_head;
411 xprt->sc_ctxt_head = ctxt;
412 xprt->sc_ctxt_cnt++;
413 }
414 }
415}
416
417static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt)
418{
419 struct svc_rdma_op_ctxt *next;
420 if (!ctxt)
421 return;
422
423 do {
424 next = ctxt->next;
425 kfree(ctxt);
426 ctxt = next;
427 } while (next);
428}
429
430static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
431 int listener)
432{
433 struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
434
435 if (!cma_xprt)
436 return NULL;
437 svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv);
438 INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
439 INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
440 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
441 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
442 init_waitqueue_head(&cma_xprt->sc_send_wait);
443
444 spin_lock_init(&cma_xprt->sc_lock);
445 spin_lock_init(&cma_xprt->sc_read_complete_lock);
446 spin_lock_init(&cma_xprt->sc_ctxt_lock);
447 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
448
449 cma_xprt->sc_ord = svcrdma_ord;
450
451 cma_xprt->sc_max_req_size = svcrdma_max_req_size;
452 cma_xprt->sc_max_requests = svcrdma_max_requests;
453 cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
454 atomic_set(&cma_xprt->sc_sq_count, 0);
455
456 if (!listener) {
457 int reqs = cma_xprt->sc_max_requests;
458 create_context_cache(cma_xprt,
459 reqs << 1, /* starting size */
460 reqs, /* bump amount */
461 reqs +
462 cma_xprt->sc_sq_depth +
463 RPCRDMA_MAX_THREADS + 1); /* max */
464 if (!cma_xprt->sc_ctxt_head) {
465 kfree(cma_xprt);
466 return NULL;
467 }
468 clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
469 } else
470 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
471
472 return cma_xprt;
473}
474
475struct page *svc_rdma_get_page(void)
476{
477 struct page *page;
478
479 while ((page = alloc_page(GFP_KERNEL)) == NULL) {
480 /* If we can't get memory, wait a bit and try again */
481 printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 "
482 "jiffies.\n");
483 schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
484 }
485 return page;
486}
487
488int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
489{
490 struct ib_recv_wr recv_wr, *bad_recv_wr;
491 struct svc_rdma_op_ctxt *ctxt;
492 struct page *page;
493 unsigned long pa;
494 int sge_no;
495 int buflen;
496 int ret;
497
498 ctxt = svc_rdma_get_context(xprt);
499 buflen = 0;
500 ctxt->direction = DMA_FROM_DEVICE;
501 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
502 BUG_ON(sge_no >= xprt->sc_max_sge);
503 page = svc_rdma_get_page();
504 ctxt->pages[sge_no] = page;
505 pa = ib_dma_map_page(xprt->sc_cm_id->device,
506 page, 0, PAGE_SIZE,
507 DMA_FROM_DEVICE);
508 ctxt->sge[sge_no].addr = pa;
509 ctxt->sge[sge_no].length = PAGE_SIZE;
510 ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
511 buflen += PAGE_SIZE;
512 }
513 ctxt->count = sge_no;
514 recv_wr.next = NULL;
515 recv_wr.sg_list = &ctxt->sge[0];
516 recv_wr.num_sge = ctxt->count;
517 recv_wr.wr_id = (u64)(unsigned long)ctxt;
518
519 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
520 return ret;
521}
522
523/*
524 * This function handles the CONNECT_REQUEST event on a listening
525 * endpoint. It is passed the cma_id for the _new_ connection. The context in
526 * this cma_id is inherited from the listening cma_id and is the svc_xprt
527 * structure for the listening endpoint.
528 *
529 * This function creates a new xprt for the new connection and enqueues it on
530 * the accept queue for the listent xprt. When the listen thread is kicked, it
531 * will call the recvfrom method on the listen xprt which will accept the new
532 * connection.
533 */
534static void handle_connect_req(struct rdma_cm_id *new_cma_id)
535{
536 struct svcxprt_rdma *listen_xprt = new_cma_id->context;
537 struct svcxprt_rdma *newxprt;
538
539 /* Create a new transport */
540 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
541 if (!newxprt) {
542 dprintk("svcrdma: failed to create new transport\n");
543 return;
544 }
545 newxprt->sc_cm_id = new_cma_id;
546 new_cma_id->context = newxprt;
547 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
548 newxprt, newxprt->sc_cm_id, listen_xprt);
549
550 /*
551 * Enqueue the new transport on the accept queue of the listening
552 * transport
553 */
554 spin_lock_bh(&listen_xprt->sc_lock);
555 list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
556 spin_unlock_bh(&listen_xprt->sc_lock);
557
558 /*
559 * Can't use svc_xprt_received here because we are not on a
560 * rqstp thread
561 */
562 set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
563 svc_xprt_enqueue(&listen_xprt->sc_xprt);
564}
565
566/*
567 * Handles events generated on the listening endpoint. These events will be
568 * either be incoming connect requests or adapter removal events.
569 */
570static int rdma_listen_handler(struct rdma_cm_id *cma_id,
571 struct rdma_cm_event *event)
572{
573 struct svcxprt_rdma *xprt = cma_id->context;
574 int ret = 0;
575
576 switch (event->event) {
577 case RDMA_CM_EVENT_CONNECT_REQUEST:
578 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
579 "event=%d\n", cma_id, cma_id->context, event->event);
580 handle_connect_req(cma_id);
581 break;
582
583 case RDMA_CM_EVENT_ESTABLISHED:
584 /* Accept complete */
585 dprintk("svcrdma: Connection completed on LISTEN xprt=%p, "
586 "cm_id=%p\n", xprt, cma_id);
587 break;
588
589 case RDMA_CM_EVENT_DEVICE_REMOVAL:
590 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
591 xprt, cma_id);
592 if (xprt)
593 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
594 break;
595
596 default:
597 dprintk("svcrdma: Unexpected event on listening endpoint %p, "
598 "event=%d\n", cma_id, event->event);
599 break;
600 }
601
602 return ret;
603}
604
605static int rdma_cma_handler(struct rdma_cm_id *cma_id,
606 struct rdma_cm_event *event)
607{
608 struct svc_xprt *xprt = cma_id->context;
609 struct svcxprt_rdma *rdma =
610 container_of(xprt, struct svcxprt_rdma, sc_xprt);
611 switch (event->event) {
612 case RDMA_CM_EVENT_ESTABLISHED:
613 /* Accept complete */
614 dprintk("svcrdma: Connection completed on DTO xprt=%p, "
615 "cm_id=%p\n", xprt, cma_id);
616 clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
617 svc_xprt_enqueue(xprt);
618 break;
619 case RDMA_CM_EVENT_DISCONNECTED:
620 dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
621 xprt, cma_id);
622 if (xprt) {
623 set_bit(XPT_CLOSE, &xprt->xpt_flags);
624 svc_xprt_enqueue(xprt);
625 }
626 break;
627 case RDMA_CM_EVENT_DEVICE_REMOVAL:
628 dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
629 "event=%d\n", cma_id, xprt, event->event);
630 if (xprt) {
631 set_bit(XPT_CLOSE, &xprt->xpt_flags);
632 svc_xprt_enqueue(xprt);
633 }
634 break;
635 default:
636 dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
637 "event=%d\n", cma_id, event->event);
638 break;
639 }
640 return 0;
641}
642
643/*
644 * Create a listening RDMA service endpoint.
645 */
646static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
647 struct sockaddr *sa, int salen,
648 int flags)
649{
650 struct rdma_cm_id *listen_id;
651 struct svcxprt_rdma *cma_xprt;
652 struct svc_xprt *xprt;
653 int ret;
654
655 dprintk("svcrdma: Creating RDMA socket\n");
656
657 cma_xprt = rdma_create_xprt(serv, 1);
658 if (!cma_xprt)
659 return ERR_PTR(ENOMEM);
660 xprt = &cma_xprt->sc_xprt;
661
662 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
663 if (IS_ERR(listen_id)) {
664 rdma_destroy_xprt(cma_xprt);
665 dprintk("svcrdma: rdma_create_id failed = %ld\n",
666 PTR_ERR(listen_id));
667 return (void *)listen_id;
668 }
669 ret = rdma_bind_addr(listen_id, sa);
670 if (ret) {
671 rdma_destroy_xprt(cma_xprt);
672 rdma_destroy_id(listen_id);
673 dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
674 return ERR_PTR(ret);
675 }
676 cma_xprt->sc_cm_id = listen_id;
677
678 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
679 if (ret) {
680 rdma_destroy_id(listen_id);
681 rdma_destroy_xprt(cma_xprt);
682 dprintk("svcrdma: rdma_listen failed = %d\n", ret);
683 }
684
685 /*
686 * We need to use the address from the cm_id in case the
687 * caller specified 0 for the port number.
688 */
689 sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr;
690 svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
691
692 return &cma_xprt->sc_xprt;
693}
694
695/*
696 * This is the xpo_recvfrom function for listening endpoints. Its
697 * purpose is to accept incoming connections. The CMA callback handler
698 * has already created a new transport and attached it to the new CMA
699 * ID.
700 *
701 * There is a queue of pending connections hung on the listening
702 * transport. This queue contains the new svc_xprt structure. This
703 * function takes svc_xprt structures off the accept_q and completes
704 * the connection.
705 */
706static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
707{
708 struct svcxprt_rdma *listen_rdma;
709 struct svcxprt_rdma *newxprt = NULL;
710 struct rdma_conn_param conn_param;
711 struct ib_qp_init_attr qp_attr;
712 struct ib_device_attr devattr;
713 struct sockaddr *sa;
714 int ret;
715 int i;
716
717 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
718 clear_bit(XPT_CONN, &xprt->xpt_flags);
719 /* Get the next entry off the accept list */
720 spin_lock_bh(&listen_rdma->sc_lock);
721 if (!list_empty(&listen_rdma->sc_accept_q)) {
722 newxprt = list_entry(listen_rdma->sc_accept_q.next,
723 struct svcxprt_rdma, sc_accept_q);
724 list_del_init(&newxprt->sc_accept_q);
725 }
726 if (!list_empty(&listen_rdma->sc_accept_q))
727 set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
728 spin_unlock_bh(&listen_rdma->sc_lock);
729 if (!newxprt)
730 return NULL;
731
732 dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
733 newxprt, newxprt->sc_cm_id);
734
735 ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
736 if (ret) {
737 dprintk("svcrdma: could not query device attributes on "
738 "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
739 goto errout;
740 }
741
742 /* Qualify the transport resource defaults with the
743 * capabilities of this particular device */
744 newxprt->sc_max_sge = min((size_t)devattr.max_sge,
745 (size_t)RPCSVC_MAXPAGES);
746 newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
747 (size_t)svcrdma_max_requests);
748 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
749
750 newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom,
751 (size_t)svcrdma_ord);
752
753 newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
754 if (IS_ERR(newxprt->sc_pd)) {
755 dprintk("svcrdma: error creating PD for connect request\n");
756 goto errout;
757 }
758 newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
759 sq_comp_handler,
760 cq_event_handler,
761 newxprt,
762 newxprt->sc_sq_depth,
763 0);
764 if (IS_ERR(newxprt->sc_sq_cq)) {
765 dprintk("svcrdma: error creating SQ CQ for connect request\n");
766 goto errout;
767 }
768 newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
769 rq_comp_handler,
770 cq_event_handler,
771 newxprt,
772 newxprt->sc_max_requests,
773 0);
774 if (IS_ERR(newxprt->sc_rq_cq)) {
775 dprintk("svcrdma: error creating RQ CQ for connect request\n");
776 goto errout;
777 }
778
779 memset(&qp_attr, 0, sizeof qp_attr);
780 qp_attr.event_handler = qp_event_handler;
781 qp_attr.qp_context = &newxprt->sc_xprt;
782 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
783 qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
784 qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
785 qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
786 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
787 qp_attr.qp_type = IB_QPT_RC;
788 qp_attr.send_cq = newxprt->sc_sq_cq;
789 qp_attr.recv_cq = newxprt->sc_rq_cq;
790 dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n"
791 " cm_id->device=%p, sc_pd->device=%p\n"
792 " cap.max_send_wr = %d\n"
793 " cap.max_recv_wr = %d\n"
794 " cap.max_send_sge = %d\n"
795 " cap.max_recv_sge = %d\n",
796 newxprt->sc_cm_id, newxprt->sc_pd,
797 newxprt->sc_cm_id->device, newxprt->sc_pd->device,
798 qp_attr.cap.max_send_wr,
799 qp_attr.cap.max_recv_wr,
800 qp_attr.cap.max_send_sge,
801 qp_attr.cap.max_recv_sge);
802
803 ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
804 if (ret) {
805 /*
806 * XXX: This is a hack. We need a xx_request_qp interface
807 * that will adjust the qp_attr's with a best-effort
808 * number
809 */
810 qp_attr.cap.max_send_sge -= 2;
811 qp_attr.cap.max_recv_sge -= 2;
812 ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd,
813 &qp_attr);
814 if (ret) {
815 dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
816 goto errout;
817 }
818 newxprt->sc_max_sge = qp_attr.cap.max_send_sge;
819 newxprt->sc_max_sge = qp_attr.cap.max_recv_sge;
820 newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
821 newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
822 }
823 newxprt->sc_qp = newxprt->sc_cm_id->qp;
824
825 /* Register all of physical memory */
826 newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd,
827 IB_ACCESS_LOCAL_WRITE |
828 IB_ACCESS_REMOTE_WRITE);
829 if (IS_ERR(newxprt->sc_phys_mr)) {
830 dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret);
831 goto errout;
832 }
833
834 /* Post receive buffers */
835 for (i = 0; i < newxprt->sc_max_requests; i++) {
836 ret = svc_rdma_post_recv(newxprt);
837 if (ret) {
838 dprintk("svcrdma: failure posting receive buffers\n");
839 goto errout;
840 }
841 }
842
843 /* Swap out the handler */
844 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
845
846 /* Accept Connection */
847 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
848 memset(&conn_param, 0, sizeof conn_param);
849 conn_param.responder_resources = 0;
850 conn_param.initiator_depth = newxprt->sc_ord;
851 ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
852 if (ret) {
853 dprintk("svcrdma: failed to accept new connection, ret=%d\n",
854 ret);
855 goto errout;
856 }
857
858 dprintk("svcrdma: new connection %p accepted with the following "
859 "attributes:\n"
860 " local_ip : %d.%d.%d.%d\n"
861 " local_port : %d\n"
862 " remote_ip : %d.%d.%d.%d\n"
863 " remote_port : %d\n"
864 " max_sge : %d\n"
865 " sq_depth : %d\n"
866 " max_requests : %d\n"
867 " ord : %d\n",
868 newxprt,
869 NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id->
870 route.addr.src_addr)->sin_addr.s_addr),
871 ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
872 route.addr.src_addr)->sin_port),
873 NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id->
874 route.addr.dst_addr)->sin_addr.s_addr),
875 ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
876 route.addr.dst_addr)->sin_port),
877 newxprt->sc_max_sge,
878 newxprt->sc_sq_depth,
879 newxprt->sc_max_requests,
880 newxprt->sc_ord);
881
882 /* Set the local and remote addresses in the transport */
883 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
884 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
885 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
886 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
887
888 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
889 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
890 return &newxprt->sc_xprt;
891
892 errout:
893 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
894 rdma_destroy_id(newxprt->sc_cm_id);
895 rdma_destroy_xprt(newxprt);
896 return NULL;
897}
898
899/*
900 * Post an RQ WQE to the RQ when the rqst is being released. This
901 * effectively returns an RQ credit to the client. The rq_xprt_ctxt
902 * will be null if the request is deferred due to an RDMA_READ or the
903 * transport had no data ready (EAGAIN). Note that an RPC deferred in
904 * svc_process will still return the credit, this is because the data
905 * is copied and no longer consume a WQE/WC.
906 */
907static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
908{
909 int err;
910 struct svcxprt_rdma *rdma =
911 container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
912 if (rqstp->rq_xprt_ctxt) {
913 BUG_ON(rqstp->rq_xprt_ctxt != rdma);
914 err = svc_rdma_post_recv(rdma);
915 if (err)
916 dprintk("svcrdma: failed to post an RQ WQE error=%d\n",
917 err);
918 }
919 rqstp->rq_xprt_ctxt = NULL;
920}
921
922/* Disable data ready events for this connection */
923static void svc_rdma_detach(struct svc_xprt *xprt)
924{
925 struct svcxprt_rdma *rdma =
926 container_of(xprt, struct svcxprt_rdma, sc_xprt);
927 unsigned long flags;
928
929 dprintk("svc: svc_rdma_detach(%p)\n", xprt);
930 /*
931 * Shutdown the connection. This will ensure we don't get any
932 * more events from the provider.
933 */
934 rdma_disconnect(rdma->sc_cm_id);
935 rdma_destroy_id(rdma->sc_cm_id);
936
937 /* We may already be on the DTO list */
938 spin_lock_irqsave(&dto_lock, flags);
939 if (!list_empty(&rdma->sc_dto_q))
940 list_del_init(&rdma->sc_dto_q);
941 spin_unlock_irqrestore(&dto_lock, flags);
942}
943
944static void svc_rdma_free(struct svc_xprt *xprt)
945{
946 struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
947 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
948 rdma_destroy_xprt(rdma);
949 kfree(rdma);
950}
951
952static void rdma_destroy_xprt(struct svcxprt_rdma *xprt)
953{
954 if (xprt->sc_qp && !IS_ERR(xprt->sc_qp))
955 ib_destroy_qp(xprt->sc_qp);
956
957 if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq))
958 ib_destroy_cq(xprt->sc_sq_cq);
959
960 if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq))
961 ib_destroy_cq(xprt->sc_rq_cq);
962
963 if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr))
964 ib_dereg_mr(xprt->sc_phys_mr);
965
966 if (xprt->sc_pd && !IS_ERR(xprt->sc_pd))
967 ib_dealloc_pd(xprt->sc_pd);
968
969 destroy_context_cache(xprt->sc_ctxt_head);
970}
971
972static int svc_rdma_has_wspace(struct svc_xprt *xprt)
973{
974 struct svcxprt_rdma *rdma =
975 container_of(xprt, struct svcxprt_rdma, sc_xprt);
976
977 /*
978 * If there are fewer SQ WR available than required to send a
979 * simple response, return false.
980 */
981 if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
982 return 0;
983
984 /*
985 * ...or there are already waiters on the SQ,
986 * return false.
987 */
988 if (waitqueue_active(&rdma->sc_send_wait))
989 return 0;
990
991 /* Otherwise return true. */
992 return 1;
993}
994
995int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
996{
997 struct ib_send_wr *bad_wr;
998 int ret;
999
1000 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1001 return 0;
1002
1003 BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1004 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
1005 wr->opcode);
1006 /* If the SQ is full, wait until an SQ entry is available */
1007 while (1) {
1008 spin_lock_bh(&xprt->sc_lock);
1009 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
1010 spin_unlock_bh(&xprt->sc_lock);
1011 atomic_inc(&rdma_stat_sq_starve);
1012 /* See if we can reap some SQ WR */
1013 sq_cq_reap(xprt);
1014
1015 /* Wait until SQ WR available if SQ still full */
1016 wait_event(xprt->sc_send_wait,
1017 atomic_read(&xprt->sc_sq_count) <
1018 xprt->sc_sq_depth);
1019 continue;
1020 }
1021 /* Bumped used SQ WR count and post */
1022 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1023 if (!ret)
1024 atomic_inc(&xprt->sc_sq_count);
1025 else
1026 dprintk("svcrdma: failed to post SQ WR rc=%d, "
1027 "sc_sq_count=%d, sc_sq_depth=%d\n",
1028 ret, atomic_read(&xprt->sc_sq_count),
1029 xprt->sc_sq_depth);
1030 spin_unlock_bh(&xprt->sc_lock);
1031 break;
1032 }
1033 return ret;
1034}
1035
1036int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1037 enum rpcrdma_errcode err)
1038{
1039 struct ib_send_wr err_wr;
1040 struct ib_sge sge;
1041 struct page *p;
1042 struct svc_rdma_op_ctxt *ctxt;
1043 u32 *va;
1044 int length;
1045 int ret;
1046
1047 p = svc_rdma_get_page();
1048 va = page_address(p);
1049
1050 /* XDR encode error */
1051 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1052
1053 /* Prepare SGE for local address */
1054 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
1055 p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1056 sge.lkey = xprt->sc_phys_mr->lkey;
1057 sge.length = length;
1058
1059 ctxt = svc_rdma_get_context(xprt);
1060 ctxt->count = 1;
1061 ctxt->pages[0] = p;
1062
1063 /* Prepare SEND WR */
1064 memset(&err_wr, 0, sizeof err_wr);
1065 ctxt->wr_op = IB_WR_SEND;
1066 err_wr.wr_id = (unsigned long)ctxt;
1067 err_wr.sg_list = &sge;
1068 err_wr.num_sge = 1;
1069 err_wr.opcode = IB_WR_SEND;
1070 err_wr.send_flags = IB_SEND_SIGNALED;
1071
1072 /* Post It */
1073 ret = svc_rdma_send(xprt, &err_wr);
1074 if (ret) {
1075 dprintk("svcrdma: Error posting send = %d\n", ret);
1076 svc_rdma_put_context(ctxt, 1);
1077 }
1078
1079 return ret;
1080}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index dc55cc974c90..02c522c17de5 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -89,7 +89,7 @@ static struct ctl_table_header *sunrpc_table_header;
89 89
90static ctl_table xr_tunables_table[] = { 90static ctl_table xr_tunables_table[] = {
91 { 91 {
92 .ctl_name = CTL_SLOTTABLE_RDMA, 92 .ctl_name = CTL_UNNUMBERED,
93 .procname = "rdma_slot_table_entries", 93 .procname = "rdma_slot_table_entries",
94 .data = &xprt_rdma_slot_table_entries, 94 .data = &xprt_rdma_slot_table_entries,
95 .maxlen = sizeof(unsigned int), 95 .maxlen = sizeof(unsigned int),
@@ -100,7 +100,7 @@ static ctl_table xr_tunables_table[] = {
100 .extra2 = &max_slot_table_size 100 .extra2 = &max_slot_table_size
101 }, 101 },
102 { 102 {
103 .ctl_name = CTL_RDMA_MAXINLINEREAD, 103 .ctl_name = CTL_UNNUMBERED,
104 .procname = "rdma_max_inline_read", 104 .procname = "rdma_max_inline_read",
105 .data = &xprt_rdma_max_inline_read, 105 .data = &xprt_rdma_max_inline_read,
106 .maxlen = sizeof(unsigned int), 106 .maxlen = sizeof(unsigned int),
@@ -109,7 +109,7 @@ static ctl_table xr_tunables_table[] = {
109 .strategy = &sysctl_intvec, 109 .strategy = &sysctl_intvec,
110 }, 110 },
111 { 111 {
112 .ctl_name = CTL_RDMA_MAXINLINEWRITE, 112 .ctl_name = CTL_UNNUMBERED,
113 .procname = "rdma_max_inline_write", 113 .procname = "rdma_max_inline_write",
114 .data = &xprt_rdma_max_inline_write, 114 .data = &xprt_rdma_max_inline_write,
115 .maxlen = sizeof(unsigned int), 115 .maxlen = sizeof(unsigned int),
@@ -118,7 +118,7 @@ static ctl_table xr_tunables_table[] = {
118 .strategy = &sysctl_intvec, 118 .strategy = &sysctl_intvec,
119 }, 119 },
120 { 120 {
121 .ctl_name = CTL_RDMA_WRITEPADDING, 121 .ctl_name = CTL_UNNUMBERED,
122 .procname = "rdma_inline_write_padding", 122 .procname = "rdma_inline_write_padding",
123 .data = &xprt_rdma_inline_write_padding, 123 .data = &xprt_rdma_inline_write_padding,
124 .maxlen = sizeof(unsigned int), 124 .maxlen = sizeof(unsigned int),
@@ -129,7 +129,7 @@ static ctl_table xr_tunables_table[] = {
129 .extra2 = &max_padding, 129 .extra2 = &max_padding,
130 }, 130 },
131 { 131 {
132 .ctl_name = CTL_RDMA_MEMREG, 132 .ctl_name = CTL_UNNUMBERED,
133 .procname = "rdma_memreg_strategy", 133 .procname = "rdma_memreg_strategy",
134 .data = &xprt_rdma_memreg_strategy, 134 .data = &xprt_rdma_memreg_strategy,
135 .maxlen = sizeof(unsigned int), 135 .maxlen = sizeof(unsigned int),
@@ -212,12 +212,16 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
212static void 212static void
213xprt_rdma_free_addresses(struct rpc_xprt *xprt) 213xprt_rdma_free_addresses(struct rpc_xprt *xprt)
214{ 214{
215 kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); 215 unsigned int i;
216 kfree(xprt->address_strings[RPC_DISPLAY_PORT]); 216
217 kfree(xprt->address_strings[RPC_DISPLAY_ALL]); 217 for (i = 0; i < RPC_DISPLAY_MAX; i++)
218 kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); 218 switch (i) {
219 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); 219 case RPC_DISPLAY_PROTO:
220 kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); 220 case RPC_DISPLAY_NETID:
221 continue;
222 default:
223 kfree(xprt->address_strings[i]);
224 }
221} 225}
222 226
223static void 227static void
@@ -289,6 +293,11 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
289 module_put(THIS_MODULE); 293 module_put(THIS_MODULE);
290} 294}
291 295
296static const struct rpc_timeout xprt_rdma_default_timeout = {
297 .to_initval = 60 * HZ,
298 .to_maxval = 60 * HZ,
299};
300
292/** 301/**
293 * xprt_setup_rdma - Set up transport to use RDMA 302 * xprt_setup_rdma - Set up transport to use RDMA
294 * 303 *
@@ -320,14 +329,14 @@ xprt_setup_rdma(struct xprt_create *args)
320 xprt->slot = kcalloc(xprt->max_reqs, 329 xprt->slot = kcalloc(xprt->max_reqs,
321 sizeof(struct rpc_rqst), GFP_KERNEL); 330 sizeof(struct rpc_rqst), GFP_KERNEL);
322 if (xprt->slot == NULL) { 331 if (xprt->slot == NULL) {
323 kfree(xprt);
324 dprintk("RPC: %s: couldn't allocate %d slots\n", 332 dprintk("RPC: %s: couldn't allocate %d slots\n",
325 __func__, xprt->max_reqs); 333 __func__, xprt->max_reqs);
334 kfree(xprt);
326 return ERR_PTR(-ENOMEM); 335 return ERR_PTR(-ENOMEM);
327 } 336 }
328 337
329 /* 60 second timeout, no retries */ 338 /* 60 second timeout, no retries */
330 xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); 339 xprt->timeout = &xprt_rdma_default_timeout;
331 xprt->bind_timeout = (60U * HZ); 340 xprt->bind_timeout = (60U * HZ);
332 xprt->connect_timeout = (60U * HZ); 341 xprt->connect_timeout = (60U * HZ);
333 xprt->reestablish_timeout = (5U * HZ); 342 xprt->reestablish_timeout = (5U * HZ);
@@ -449,7 +458,7 @@ xprt_rdma_close(struct rpc_xprt *xprt)
449 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 458 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
450 459
451 dprintk("RPC: %s: closing\n", __func__); 460 dprintk("RPC: %s: closing\n", __func__);
452 xprt_disconnect(xprt); 461 xprt_disconnect_done(xprt);
453 (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); 462 (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
454} 463}
455 464
@@ -682,7 +691,7 @@ xprt_rdma_send_request(struct rpc_task *task)
682 } 691 }
683 692
684 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { 693 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
685 xprt_disconnect(xprt); 694 xprt_disconnect_done(xprt);
686 return -ENOTCONN; /* implies disconnect */ 695 return -ENOTCONN; /* implies disconnect */
687 } 696 }
688 697
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 44b0fb942e8d..ffbf22a1d2ca 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -522,7 +522,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
522 struct rpcrdma_create_data_internal *cdata) 522 struct rpcrdma_create_data_internal *cdata)
523{ 523{
524 struct ib_device_attr devattr; 524 struct ib_device_attr devattr;
525 int rc; 525 int rc, err;
526 526
527 rc = ib_query_device(ia->ri_id->device, &devattr); 527 rc = ib_query_device(ia->ri_id->device, &devattr);
528 if (rc) { 528 if (rc) {
@@ -648,8 +648,10 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
648 return 0; 648 return 0;
649 649
650out2: 650out2:
651 if (ib_destroy_cq(ep->rep_cq)) 651 err = ib_destroy_cq(ep->rep_cq);
652 ; 652 if (err)
653 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
654 __func__, err);
653out1: 655out1:
654 return rc; 656 return rc;
655} 657}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 02298f529dad..30e7ac243a90 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -280,7 +280,9 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
280 return (struct sockaddr_in6 *) &xprt->addr; 280 return (struct sockaddr_in6 *) &xprt->addr;
281} 281}
282 282
283static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) 283static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt,
284 const char *protocol,
285 const char *netid)
284{ 286{
285 struct sockaddr_in *addr = xs_addr_in(xprt); 287 struct sockaddr_in *addr = xs_addr_in(xprt);
286 char *buf; 288 char *buf;
@@ -299,21 +301,14 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
299 } 301 }
300 xprt->address_strings[RPC_DISPLAY_PORT] = buf; 302 xprt->address_strings[RPC_DISPLAY_PORT] = buf;
301 303
302 buf = kzalloc(8, GFP_KERNEL); 304 xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
303 if (buf) {
304 if (xprt->prot == IPPROTO_UDP)
305 snprintf(buf, 8, "udp");
306 else
307 snprintf(buf, 8, "tcp");
308 }
309 xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
310 305
311 buf = kzalloc(48, GFP_KERNEL); 306 buf = kzalloc(48, GFP_KERNEL);
312 if (buf) { 307 if (buf) {
313 snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", 308 snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
314 NIPQUAD(addr->sin_addr.s_addr), 309 NIPQUAD(addr->sin_addr.s_addr),
315 ntohs(addr->sin_port), 310 ntohs(addr->sin_port),
316 xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); 311 protocol);
317 } 312 }
318 xprt->address_strings[RPC_DISPLAY_ALL] = buf; 313 xprt->address_strings[RPC_DISPLAY_ALL] = buf;
319 314
@@ -340,12 +335,12 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
340 } 335 }
341 xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; 336 xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
342 337
343 xprt->address_strings[RPC_DISPLAY_NETID] = 338 xprt->address_strings[RPC_DISPLAY_NETID] = netid;
344 kstrdup(xprt->prot == IPPROTO_UDP ?
345 RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);
346} 339}
347 340
348static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) 341static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
342 const char *protocol,
343 const char *netid)
349{ 344{
350 struct sockaddr_in6 *addr = xs_addr_in6(xprt); 345 struct sockaddr_in6 *addr = xs_addr_in6(xprt);
351 char *buf; 346 char *buf;
@@ -364,21 +359,14 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
364 } 359 }
365 xprt->address_strings[RPC_DISPLAY_PORT] = buf; 360 xprt->address_strings[RPC_DISPLAY_PORT] = buf;
366 361
367 buf = kzalloc(8, GFP_KERNEL); 362 xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
368 if (buf) {
369 if (xprt->prot == IPPROTO_UDP)
370 snprintf(buf, 8, "udp");
371 else
372 snprintf(buf, 8, "tcp");
373 }
374 xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
375 363
376 buf = kzalloc(64, GFP_KERNEL); 364 buf = kzalloc(64, GFP_KERNEL);
377 if (buf) { 365 if (buf) {
378 snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", 366 snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
379 NIP6(addr->sin6_addr), 367 NIP6(addr->sin6_addr),
380 ntohs(addr->sin6_port), 368 ntohs(addr->sin6_port),
381 xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); 369 protocol);
382 } 370 }
383 xprt->address_strings[RPC_DISPLAY_ALL] = buf; 371 xprt->address_strings[RPC_DISPLAY_ALL] = buf;
384 372
@@ -405,17 +393,21 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
405 } 393 }
406 xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; 394 xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
407 395
408 xprt->address_strings[RPC_DISPLAY_NETID] = 396 xprt->address_strings[RPC_DISPLAY_NETID] = netid;
409 kstrdup(xprt->prot == IPPROTO_UDP ?
410 RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);
411} 397}
412 398
413static void xs_free_peer_addresses(struct rpc_xprt *xprt) 399static void xs_free_peer_addresses(struct rpc_xprt *xprt)
414{ 400{
415 int i; 401 unsigned int i;
416 402
417 for (i = 0; i < RPC_DISPLAY_MAX; i++) 403 for (i = 0; i < RPC_DISPLAY_MAX; i++)
418 kfree(xprt->address_strings[i]); 404 switch (i) {
405 case RPC_DISPLAY_PROTO:
406 case RPC_DISPLAY_NETID:
407 continue;
408 default:
409 kfree(xprt->address_strings[i]);
410 }
419} 411}
420 412
421#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) 413#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
@@ -614,6 +606,22 @@ static int xs_udp_send_request(struct rpc_task *task)
614 return status; 606 return status;
615} 607}
616 608
609/**
610 * xs_tcp_shutdown - gracefully shut down a TCP socket
611 * @xprt: transport
612 *
613 * Initiates a graceful shutdown of the TCP socket by calling the
614 * equivalent of shutdown(SHUT_WR);
615 */
616static void xs_tcp_shutdown(struct rpc_xprt *xprt)
617{
618 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
619 struct socket *sock = transport->sock;
620
621 if (sock != NULL)
622 kernel_sock_shutdown(sock, SHUT_WR);
623}
624
617static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) 625static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
618{ 626{
619 u32 reclen = buf->len - sizeof(rpc_fraghdr); 627 u32 reclen = buf->len - sizeof(rpc_fraghdr);
@@ -691,7 +699,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
691 default: 699 default:
692 dprintk("RPC: sendmsg returned unrecognized error %d\n", 700 dprintk("RPC: sendmsg returned unrecognized error %d\n",
693 -status); 701 -status);
694 xprt_disconnect(xprt); 702 xs_tcp_shutdown(xprt);
695 break; 703 break;
696 } 704 }
697 705
@@ -759,7 +767,9 @@ static void xs_close(struct rpc_xprt *xprt)
759clear_close_wait: 767clear_close_wait:
760 smp_mb__before_clear_bit(); 768 smp_mb__before_clear_bit();
761 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 769 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
770 clear_bit(XPRT_CLOSING, &xprt->state);
762 smp_mb__after_clear_bit(); 771 smp_mb__after_clear_bit();
772 xprt_disconnect_done(xprt);
763} 773}
764 774
765/** 775/**
@@ -775,7 +785,6 @@ static void xs_destroy(struct rpc_xprt *xprt)
775 785
776 cancel_rearming_delayed_work(&transport->connect_worker); 786 cancel_rearming_delayed_work(&transport->connect_worker);
777 787
778 xprt_disconnect(xprt);
779 xs_close(xprt); 788 xs_close(xprt);
780 xs_free_peer_addresses(xprt); 789 xs_free_peer_addresses(xprt);
781 kfree(xprt->slot); 790 kfree(xprt->slot);
@@ -838,8 +847,12 @@ static void xs_udp_data_ready(struct sock *sk, int len)
838 copied = repsize; 847 copied = repsize;
839 848
840 /* Suck it into the iovec, verify checksum if not done by hw. */ 849 /* Suck it into the iovec, verify checksum if not done by hw. */
841 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) 850 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
851 UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS);
842 goto out_unlock; 852 goto out_unlock;
853 }
854
855 UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
843 856
844 /* Something worked... */ 857 /* Something worked... */
845 dst_confirm(skb->dst); 858 dst_confirm(skb->dst);
@@ -882,7 +895,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
882 /* Sanity check of the record length */ 895 /* Sanity check of the record length */
883 if (unlikely(transport->tcp_reclen < 4)) { 896 if (unlikely(transport->tcp_reclen < 4)) {
884 dprintk("RPC: invalid TCP record fragment length\n"); 897 dprintk("RPC: invalid TCP record fragment length\n");
885 xprt_disconnect(xprt); 898 xprt_force_disconnect(xprt);
886 return; 899 return;
887 } 900 }
888 dprintk("RPC: reading TCP record fragment of length %d\n", 901 dprintk("RPC: reading TCP record fragment of length %d\n",
@@ -1109,21 +1122,44 @@ static void xs_tcp_state_change(struct sock *sk)
1109 transport->tcp_flags = 1122 transport->tcp_flags =
1110 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; 1123 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
1111 1124
1112 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1113 xprt_wake_pending_tasks(xprt, 0); 1125 xprt_wake_pending_tasks(xprt, 0);
1114 } 1126 }
1115 spin_unlock_bh(&xprt->transport_lock); 1127 spin_unlock_bh(&xprt->transport_lock);
1116 break; 1128 break;
1117 case TCP_SYN_SENT: 1129 case TCP_FIN_WAIT1:
1118 case TCP_SYN_RECV: 1130 /* The client initiated a shutdown of the socket */
1131 xprt->reestablish_timeout = 0;
1132 set_bit(XPRT_CLOSING, &xprt->state);
1133 smp_mb__before_clear_bit();
1134 clear_bit(XPRT_CONNECTED, &xprt->state);
1135 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1136 smp_mb__after_clear_bit();
1119 break; 1137 break;
1120 case TCP_CLOSE_WAIT: 1138 case TCP_CLOSE_WAIT:
1121 /* Try to schedule an autoclose RPC calls */ 1139 /* The server initiated a shutdown of the socket */
1122 set_bit(XPRT_CLOSE_WAIT, &xprt->state); 1140 set_bit(XPRT_CLOSING, &xprt->state);
1123 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 1141 xprt_force_disconnect(xprt);
1124 queue_work(rpciod_workqueue, &xprt->task_cleanup); 1142 case TCP_SYN_SENT:
1125 default: 1143 case TCP_CLOSING:
1126 xprt_disconnect(xprt); 1144 /*
1145 * If the server closed down the connection, make sure that
1146 * we back off before reconnecting
1147 */
1148 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
1149 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1150 break;
1151 case TCP_LAST_ACK:
1152 smp_mb__before_clear_bit();
1153 clear_bit(XPRT_CONNECTED, &xprt->state);
1154 smp_mb__after_clear_bit();
1155 break;
1156 case TCP_CLOSE:
1157 smp_mb__before_clear_bit();
1158 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1159 clear_bit(XPRT_CLOSING, &xprt->state);
1160 smp_mb__after_clear_bit();
1161 /* Mark transport as closed and wake up all pending tasks */
1162 xprt_disconnect_done(xprt);
1127 } 1163 }
1128 out: 1164 out:
1129 read_unlock(&sk->sk_callback_lock); 1165 read_unlock(&sk->sk_callback_lock);
@@ -1275,34 +1311,53 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1275 } 1311 }
1276} 1312}
1277 1313
1314static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock)
1315{
1316 unsigned short port = transport->port;
1317
1318 if (port == 0 && transport->xprt.resvport)
1319 port = xs_get_random_port();
1320 return port;
1321}
1322
1323static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port)
1324{
1325 if (transport->port != 0)
1326 transport->port = 0;
1327 if (!transport->xprt.resvport)
1328 return 0;
1329 if (port <= xprt_min_resvport || port > xprt_max_resvport)
1330 return xprt_max_resvport;
1331 return --port;
1332}
1333
1278static int xs_bind4(struct sock_xprt *transport, struct socket *sock) 1334static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1279{ 1335{
1280 struct sockaddr_in myaddr = { 1336 struct sockaddr_in myaddr = {
1281 .sin_family = AF_INET, 1337 .sin_family = AF_INET,
1282 }; 1338 };
1283 struct sockaddr_in *sa; 1339 struct sockaddr_in *sa;
1284 int err; 1340 int err, nloop = 0;
1285 unsigned short port = transport->port; 1341 unsigned short port = xs_get_srcport(transport, sock);
1342 unsigned short last;
1286 1343
1287 if (!transport->xprt.resvport)
1288 port = 0;
1289 sa = (struct sockaddr_in *)&transport->addr; 1344 sa = (struct sockaddr_in *)&transport->addr;
1290 myaddr.sin_addr = sa->sin_addr; 1345 myaddr.sin_addr = sa->sin_addr;
1291 do { 1346 do {
1292 myaddr.sin_port = htons(port); 1347 myaddr.sin_port = htons(port);
1293 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1348 err = kernel_bind(sock, (struct sockaddr *) &myaddr,
1294 sizeof(myaddr)); 1349 sizeof(myaddr));
1295 if (!transport->xprt.resvport) 1350 if (port == 0)
1296 break; 1351 break;
1297 if (err == 0) { 1352 if (err == 0) {
1298 transport->port = port; 1353 transport->port = port;
1299 break; 1354 break;
1300 } 1355 }
1301 if (port <= xprt_min_resvport) 1356 last = port;
1302 port = xprt_max_resvport; 1357 port = xs_next_srcport(transport, sock, port);
1303 else 1358 if (port > last)
1304 port--; 1359 nloop++;
1305 } while (err == -EADDRINUSE && port != transport->port); 1360 } while (err == -EADDRINUSE && nloop != 2);
1306 dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", 1361 dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n",
1307 __FUNCTION__, NIPQUAD(myaddr.sin_addr), 1362 __FUNCTION__, NIPQUAD(myaddr.sin_addr),
1308 port, err ? "failed" : "ok", err); 1363 port, err ? "failed" : "ok", err);
@@ -1315,28 +1370,27 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
1315 .sin6_family = AF_INET6, 1370 .sin6_family = AF_INET6,
1316 }; 1371 };
1317 struct sockaddr_in6 *sa; 1372 struct sockaddr_in6 *sa;
1318 int err; 1373 int err, nloop = 0;
1319 unsigned short port = transport->port; 1374 unsigned short port = xs_get_srcport(transport, sock);
1375 unsigned short last;
1320 1376
1321 if (!transport->xprt.resvport)
1322 port = 0;
1323 sa = (struct sockaddr_in6 *)&transport->addr; 1377 sa = (struct sockaddr_in6 *)&transport->addr;
1324 myaddr.sin6_addr = sa->sin6_addr; 1378 myaddr.sin6_addr = sa->sin6_addr;
1325 do { 1379 do {
1326 myaddr.sin6_port = htons(port); 1380 myaddr.sin6_port = htons(port);
1327 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1381 err = kernel_bind(sock, (struct sockaddr *) &myaddr,
1328 sizeof(myaddr)); 1382 sizeof(myaddr));
1329 if (!transport->xprt.resvport) 1383 if (port == 0)
1330 break; 1384 break;
1331 if (err == 0) { 1385 if (err == 0) {
1332 transport->port = port; 1386 transport->port = port;
1333 break; 1387 break;
1334 } 1388 }
1335 if (port <= xprt_min_resvport) 1389 last = port;
1336 port = xprt_max_resvport; 1390 port = xs_next_srcport(transport, sock, port);
1337 else 1391 if (port > last)
1338 port--; 1392 nloop++;
1339 } while (err == -EADDRINUSE && port != transport->port); 1393 } while (err == -EADDRINUSE && nloop != 2);
1340 dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", 1394 dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
1341 NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); 1395 NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
1342 return err; 1396 return err;
@@ -1598,8 +1652,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1598 break; 1652 break;
1599 default: 1653 default:
1600 /* get rid of existing socket, and retry */ 1654 /* get rid of existing socket, and retry */
1601 xs_close(xprt); 1655 xs_tcp_shutdown(xprt);
1602 break;
1603 } 1656 }
1604 } 1657 }
1605out: 1658out:
@@ -1658,8 +1711,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work)
1658 break; 1711 break;
1659 default: 1712 default:
1660 /* get rid of existing socket, and retry */ 1713 /* get rid of existing socket, and retry */
1661 xs_close(xprt); 1714 xs_tcp_shutdown(xprt);
1662 break;
1663 } 1715 }
1664 } 1716 }
1665out: 1717out:
@@ -1706,6 +1758,19 @@ static void xs_connect(struct rpc_task *task)
1706 } 1758 }
1707} 1759}
1708 1760
1761static void xs_tcp_connect(struct rpc_task *task)
1762{
1763 struct rpc_xprt *xprt = task->tk_xprt;
1764
1765 /* Initiate graceful shutdown of the socket if not already done */
1766 if (test_bit(XPRT_CONNECTED, &xprt->state))
1767 xs_tcp_shutdown(xprt);
1768 /* Exit if we need to wait for socket shutdown to complete */
1769 if (test_bit(XPRT_CLOSING, &xprt->state))
1770 return;
1771 xs_connect(task);
1772}
1773
1709/** 1774/**
1710 * xs_udp_print_stats - display UDP socket-specifc stats 1775 * xs_udp_print_stats - display UDP socket-specifc stats
1711 * @xprt: rpc_xprt struct containing statistics 1776 * @xprt: rpc_xprt struct containing statistics
@@ -1776,12 +1841,12 @@ static struct rpc_xprt_ops xs_tcp_ops = {
1776 .release_xprt = xs_tcp_release_xprt, 1841 .release_xprt = xs_tcp_release_xprt,
1777 .rpcbind = rpcb_getport_async, 1842 .rpcbind = rpcb_getport_async,
1778 .set_port = xs_set_port, 1843 .set_port = xs_set_port,
1779 .connect = xs_connect, 1844 .connect = xs_tcp_connect,
1780 .buf_alloc = rpc_malloc, 1845 .buf_alloc = rpc_malloc,
1781 .buf_free = rpc_free, 1846 .buf_free = rpc_free,
1782 .send_request = xs_tcp_send_request, 1847 .send_request = xs_tcp_send_request,
1783 .set_retrans_timeout = xprt_set_retrans_timeout_def, 1848 .set_retrans_timeout = xprt_set_retrans_timeout_def,
1784 .close = xs_close, 1849 .close = xs_tcp_shutdown,
1785 .destroy = xs_destroy, 1850 .destroy = xs_destroy,
1786 .print_stats = xs_tcp_print_stats, 1851 .print_stats = xs_tcp_print_stats,
1787}; 1852};
@@ -1818,17 +1883,23 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
1818 xprt->addrlen = args->addrlen; 1883 xprt->addrlen = args->addrlen;
1819 if (args->srcaddr) 1884 if (args->srcaddr)
1820 memcpy(&new->addr, args->srcaddr, args->addrlen); 1885 memcpy(&new->addr, args->srcaddr, args->addrlen);
1821 new->port = xs_get_random_port();
1822 1886
1823 return xprt; 1887 return xprt;
1824} 1888}
1825 1889
1890static const struct rpc_timeout xs_udp_default_timeout = {
1891 .to_initval = 5 * HZ,
1892 .to_maxval = 30 * HZ,
1893 .to_increment = 5 * HZ,
1894 .to_retries = 5,
1895};
1896
1826/** 1897/**
1827 * xs_setup_udp - Set up transport to use a UDP socket 1898 * xs_setup_udp - Set up transport to use a UDP socket
1828 * @args: rpc transport creation arguments 1899 * @args: rpc transport creation arguments
1829 * 1900 *
1830 */ 1901 */
1831struct rpc_xprt *xs_setup_udp(struct xprt_create *args) 1902static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
1832{ 1903{
1833 struct sockaddr *addr = args->dstaddr; 1904 struct sockaddr *addr = args->dstaddr;
1834 struct rpc_xprt *xprt; 1905 struct rpc_xprt *xprt;
@@ -1851,10 +1922,7 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
1851 1922
1852 xprt->ops = &xs_udp_ops; 1923 xprt->ops = &xs_udp_ops;
1853 1924
1854 if (args->timeout) 1925 xprt->timeout = &xs_udp_default_timeout;
1855 xprt->timeout = *args->timeout;
1856 else
1857 xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
1858 1926
1859 switch (addr->sa_family) { 1927 switch (addr->sa_family) {
1860 case AF_INET: 1928 case AF_INET:
@@ -1863,7 +1931,7 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
1863 1931
1864 INIT_DELAYED_WORK(&transport->connect_worker, 1932 INIT_DELAYED_WORK(&transport->connect_worker,
1865 xs_udp_connect_worker4); 1933 xs_udp_connect_worker4);
1866 xs_format_ipv4_peer_addresses(xprt); 1934 xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
1867 break; 1935 break;
1868 case AF_INET6: 1936 case AF_INET6:
1869 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 1937 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
@@ -1871,7 +1939,7 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
1871 1939
1872 INIT_DELAYED_WORK(&transport->connect_worker, 1940 INIT_DELAYED_WORK(&transport->connect_worker,
1873 xs_udp_connect_worker6); 1941 xs_udp_connect_worker6);
1874 xs_format_ipv6_peer_addresses(xprt); 1942 xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
1875 break; 1943 break;
1876 default: 1944 default:
1877 kfree(xprt); 1945 kfree(xprt);
@@ -1889,12 +1957,18 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
1889 return ERR_PTR(-EINVAL); 1957 return ERR_PTR(-EINVAL);
1890} 1958}
1891 1959
1960static const struct rpc_timeout xs_tcp_default_timeout = {
1961 .to_initval = 60 * HZ,
1962 .to_maxval = 60 * HZ,
1963 .to_retries = 2,
1964};
1965
1892/** 1966/**
1893 * xs_setup_tcp - Set up transport to use a TCP socket 1967 * xs_setup_tcp - Set up transport to use a TCP socket
1894 * @args: rpc transport creation arguments 1968 * @args: rpc transport creation arguments
1895 * 1969 *
1896 */ 1970 */
1897struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) 1971static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
1898{ 1972{
1899 struct sockaddr *addr = args->dstaddr; 1973 struct sockaddr *addr = args->dstaddr;
1900 struct rpc_xprt *xprt; 1974 struct rpc_xprt *xprt;
@@ -1915,11 +1989,7 @@ struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
1915 xprt->idle_timeout = XS_IDLE_DISC_TO; 1989 xprt->idle_timeout = XS_IDLE_DISC_TO;
1916 1990
1917 xprt->ops = &xs_tcp_ops; 1991 xprt->ops = &xs_tcp_ops;
1918 1992 xprt->timeout = &xs_tcp_default_timeout;
1919 if (args->timeout)
1920 xprt->timeout = *args->timeout;
1921 else
1922 xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
1923 1993
1924 switch (addr->sa_family) { 1994 switch (addr->sa_family) {
1925 case AF_INET: 1995 case AF_INET:
@@ -1927,14 +1997,14 @@ struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
1927 xprt_set_bound(xprt); 1997 xprt_set_bound(xprt);
1928 1998
1929 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); 1999 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
1930 xs_format_ipv4_peer_addresses(xprt); 2000 xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
1931 break; 2001 break;
1932 case AF_INET6: 2002 case AF_INET6:
1933 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2003 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
1934 xprt_set_bound(xprt); 2004 xprt_set_bound(xprt);
1935 2005
1936 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); 2006 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
1937 xs_format_ipv6_peer_addresses(xprt); 2007 xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
1938 break; 2008 break;
1939 default: 2009 default:
1940 kfree(xprt); 2010 kfree(xprt);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index cd4eafbab1b8..665e856675a4 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sysctl.h> 16#include <linux/sysctl.h>
17#include <linux/nsproxy.h>
17 18
18#include <net/sock.h> 19#include <net/sock.h>
19 20
@@ -29,28 +30,58 @@
29#include <linux/if_tr.h> 30#include <linux/if_tr.h>
30#endif 31#endif
31 32
32struct ctl_table net_table[] = { 33static struct list_head *
33 { 34net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
34 .ctl_name = NET_CORE, 35{
35 .procname = "core", 36 return &namespaces->net_ns->sysctl_table_headers;
36 .mode = 0555, 37}
37 .child = core_table, 38
38 }, 39static struct ctl_table_root net_sysctl_root = {
39#ifdef CONFIG_INET 40 .lookup = net_ctl_header_lookup,
40 { 41};
41 .ctl_name = NET_IPV4, 42
42 .procname = "ipv4", 43static int sysctl_net_init(struct net *net)
43 .mode = 0555, 44{
44 .child = ipv4_table 45 INIT_LIST_HEAD(&net->sysctl_table_headers);
45 }, 46 return 0;
46#endif 47}
47#ifdef CONFIG_TR 48
48 { 49static void sysctl_net_exit(struct net *net)
49 .ctl_name = NET_TR, 50{
50 .procname = "token-ring", 51 WARN_ON(!list_empty(&net->sysctl_table_headers));
51 .mode = 0555, 52 return;
52 .child = tr_table, 53}
53 }, 54
54#endif 55static struct pernet_operations sysctl_pernet_ops = {
55 { 0 }, 56 .init = sysctl_net_init,
57 .exit = sysctl_net_exit,
56}; 58};
59
60static __init int sysctl_init(void)
61{
62 int ret;
63 ret = register_pernet_subsys(&sysctl_pernet_ops);
64 if (ret)
65 goto out;
66 register_sysctl_root(&net_sysctl_root);
67out:
68 return ret;
69}
70subsys_initcall(sysctl_init);
71
72struct ctl_table_header *register_net_sysctl_table(struct net *net,
73 const struct ctl_path *path, struct ctl_table *table)
74{
75 struct nsproxy namespaces;
76 namespaces = *current->nsproxy;
77 namespaces.net_ns = net;
78 return __register_sysctl_paths(&net_sysctl_root,
79 &namespaces, path, table);
80}
81EXPORT_SYMBOL_GPL(register_net_sysctl_table);
82
83void unregister_net_sysctl_table(struct ctl_table_header *header)
84{
85 return unregister_sysctl_table(header);
86}
87EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index e40ada964d6e..feabca580820 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -212,9 +212,7 @@ static inline void k_init_timer(struct timer_list *timer, Handler routine,
212 unsigned long argument) 212 unsigned long argument)
213{ 213{
214 dbg("initializing timer %p\n", timer); 214 dbg("initializing timer %p\n", timer);
215 init_timer(timer); 215 setup_timer(timer, routine, argument);
216 timer->function = routine;
217 timer->data = argument;
218} 216}
219 217
220/** 218/**
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 76088153524c..f508614ca59b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -340,7 +340,7 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
340 if (!p_ptr) 340 if (!p_ptr)
341 return -EINVAL; 341 return -EINVAL;
342 *isunreliable = port_unreliable(p_ptr); 342 *isunreliable = port_unreliable(p_ptr);
343 spin_unlock_bh(p_ptr->publ.lock); 343 tipc_port_unlock(p_ptr);
344 return TIPC_OK; 344 return TIPC_OK;
345} 345}
346 346
@@ -369,7 +369,7 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
369 if (!p_ptr) 369 if (!p_ptr)
370 return -EINVAL; 370 return -EINVAL;
371 *isunrejectable = port_unreturnable(p_ptr); 371 *isunrejectable = port_unreturnable(p_ptr);
372 spin_unlock_bh(p_ptr->publ.lock); 372 tipc_port_unlock(p_ptr);
373 return TIPC_OK; 373 return TIPC_OK;
374} 374}
375 375
@@ -843,7 +843,7 @@ static void port_dispatcher_sigh(void *dummy)
843 u32 peer_port = port_peerport(p_ptr); 843 u32 peer_port = port_peerport(p_ptr);
844 u32 peer_node = port_peernode(p_ptr); 844 u32 peer_node = port_peernode(p_ptr);
845 845
846 spin_unlock_bh(p_ptr->publ.lock); 846 tipc_port_unlock(p_ptr);
847 if (unlikely(!connected)) { 847 if (unlikely(!connected)) {
848 if (unlikely(published)) 848 if (unlikely(published))
849 goto reject; 849 goto reject;
@@ -867,7 +867,7 @@ static void port_dispatcher_sigh(void *dummy)
867 case TIPC_DIRECT_MSG:{ 867 case TIPC_DIRECT_MSG:{
868 tipc_msg_event cb = up_ptr->msg_cb; 868 tipc_msg_event cb = up_ptr->msg_cb;
869 869
870 spin_unlock_bh(p_ptr->publ.lock); 870 tipc_port_unlock(p_ptr);
871 if (unlikely(connected)) 871 if (unlikely(connected))
872 goto reject; 872 goto reject;
873 if (unlikely(!cb)) 873 if (unlikely(!cb))
@@ -882,7 +882,7 @@ static void port_dispatcher_sigh(void *dummy)
882 case TIPC_NAMED_MSG:{ 882 case TIPC_NAMED_MSG:{
883 tipc_named_msg_event cb = up_ptr->named_msg_cb; 883 tipc_named_msg_event cb = up_ptr->named_msg_cb;
884 884
885 spin_unlock_bh(p_ptr->publ.lock); 885 tipc_port_unlock(p_ptr);
886 if (unlikely(connected)) 886 if (unlikely(connected))
887 goto reject; 887 goto reject;
888 if (unlikely(!cb)) 888 if (unlikely(!cb))
@@ -913,7 +913,7 @@ err:
913 u32 peer_port = port_peerport(p_ptr); 913 u32 peer_port = port_peerport(p_ptr);
914 u32 peer_node = port_peernode(p_ptr); 914 u32 peer_node = port_peernode(p_ptr);
915 915
916 spin_unlock_bh(p_ptr->publ.lock); 916 tipc_port_unlock(p_ptr);
917 if (!connected || !cb) 917 if (!connected || !cb)
918 break; 918 break;
919 if (msg_origport(msg) != peer_port) 919 if (msg_origport(msg) != peer_port)
@@ -929,7 +929,7 @@ err:
929 case TIPC_DIRECT_MSG:{ 929 case TIPC_DIRECT_MSG:{
930 tipc_msg_err_event cb = up_ptr->err_cb; 930 tipc_msg_err_event cb = up_ptr->err_cb;
931 931
932 spin_unlock_bh(p_ptr->publ.lock); 932 tipc_port_unlock(p_ptr);
933 if (connected || !cb) 933 if (connected || !cb)
934 break; 934 break;
935 skb_pull(buf, msg_hdr_sz(msg)); 935 skb_pull(buf, msg_hdr_sz(msg));
@@ -942,7 +942,7 @@ err:
942 tipc_named_msg_err_event cb = 942 tipc_named_msg_err_event cb =
943 up_ptr->named_err_cb; 943 up_ptr->named_err_cb;
944 944
945 spin_unlock_bh(p_ptr->publ.lock); 945 tipc_port_unlock(p_ptr);
946 if (connected || !cb) 946 if (connected || !cb)
947 break; 947 break;
948 dseq.type = msg_nametype(msg); 948 dseq.type = msg_nametype(msg);
@@ -1107,7 +1107,7 @@ int tipc_portimportance(u32 ref, unsigned int *importance)
1107 if (!p_ptr) 1107 if (!p_ptr)
1108 return -EINVAL; 1108 return -EINVAL;
1109 *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); 1109 *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr);
1110 spin_unlock_bh(p_ptr->publ.lock); 1110 tipc_port_unlock(p_ptr);
1111 return TIPC_OK; 1111 return TIPC_OK;
1112} 1112}
1113 1113
@@ -1122,7 +1122,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
1122 if (!p_ptr) 1122 if (!p_ptr)
1123 return -EINVAL; 1123 return -EINVAL;
1124 msg_set_importance(&p_ptr->publ.phdr, (u32)imp); 1124 msg_set_importance(&p_ptr->publ.phdr, (u32)imp);
1125 spin_unlock_bh(p_ptr->publ.lock); 1125 tipc_port_unlock(p_ptr);
1126 return TIPC_OK; 1126 return TIPC_OK;
1127} 1127}
1128 1128
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e36b4b5a5222..24ddfd2ca38b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -201,7 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
201 return -EPROTOTYPE; 201 return -EPROTOTYPE;
202 } 202 }
203 203
204 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); 204 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
205 if (!sk) { 205 if (!sk) {
206 tipc_deleteport(ref); 206 tipc_deleteport(ref);
207 return -ENOMEM; 207 return -ENOMEM;
@@ -253,7 +253,7 @@ static int release(struct socket *sock)
253 dbg("sock_delete: %x\n",tsock); 253 dbg("sock_delete: %x\n",tsock);
254 if (!tsock) 254 if (!tsock)
255 return 0; 255 return 0;
256 down_interruptible(&tsock->sem); 256 down(&tsock->sem);
257 if (!sock->sk) { 257 if (!sock->sk) {
258 up(&tsock->sem); 258 up(&tsock->sem);
259 return 0; 259 return 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 9163ec526c2a..eea75888805e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,8 +117,6 @@
117#include <net/checksum.h> 117#include <net/checksum.h>
118#include <linux/security.h> 118#include <linux/security.h>
119 119
120int sysctl_unix_max_dgram_qlen __read_mostly = 10;
121
122static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 120static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123static DEFINE_SPINLOCK(unix_table_lock); 121static DEFINE_SPINLOCK(unix_table_lock);
124static atomic_t unix_nr_socks = ATOMIC_INIT(0); 122static atomic_t unix_nr_socks = ATOMIC_INIT(0);
@@ -127,32 +125,6 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
127 125
128#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) 126#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129 127
130static struct sock *first_unix_socket(int *i)
131{
132 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
133 if (!hlist_empty(&unix_socket_table[*i]))
134 return __sk_head(&unix_socket_table[*i]);
135 }
136 return NULL;
137}
138
139static struct sock *next_unix_socket(int *i, struct sock *s)
140{
141 struct sock *next = sk_next(s);
142 /* More in this chain? */
143 if (next)
144 return next;
145 /* Look for next non-empty chain. */
146 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
147 if (!hlist_empty(&unix_socket_table[*i]))
148 return __sk_head(&unix_socket_table[*i]);
149 }
150 return NULL;
151}
152
153#define forall_unix_sockets(i, s) \
154 for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))
155
156#ifdef CONFIG_SECURITY_NETWORK 128#ifdef CONFIG_SECURITY_NETWORK
157static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 129static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158{ 130{
@@ -270,7 +242,8 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
270 spin_unlock(&unix_table_lock); 242 spin_unlock(&unix_table_lock);
271} 243}
272 244
273static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, 245static struct sock *__unix_find_socket_byname(struct net *net,
246 struct sockaddr_un *sunname,
274 int len, int type, unsigned hash) 247 int len, int type, unsigned hash)
275{ 248{
276 struct sock *s; 249 struct sock *s;
@@ -279,6 +252,9 @@ static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
279 sk_for_each(s, node, &unix_socket_table[hash ^ type]) { 252 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
280 struct unix_sock *u = unix_sk(s); 253 struct unix_sock *u = unix_sk(s);
281 254
255 if (s->sk_net != net)
256 continue;
257
282 if (u->addr->len == len && 258 if (u->addr->len == len &&
283 !memcmp(u->addr->name, sunname, len)) 259 !memcmp(u->addr->name, sunname, len))
284 goto found; 260 goto found;
@@ -288,21 +264,22 @@ found:
288 return s; 264 return s;
289} 265}
290 266
291static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, 267static inline struct sock *unix_find_socket_byname(struct net *net,
268 struct sockaddr_un *sunname,
292 int len, int type, 269 int len, int type,
293 unsigned hash) 270 unsigned hash)
294{ 271{
295 struct sock *s; 272 struct sock *s;
296 273
297 spin_lock(&unix_table_lock); 274 spin_lock(&unix_table_lock);
298 s = __unix_find_socket_byname(sunname, len, type, hash); 275 s = __unix_find_socket_byname(net, sunname, len, type, hash);
299 if (s) 276 if (s)
300 sock_hold(s); 277 sock_hold(s);
301 spin_unlock(&unix_table_lock); 278 spin_unlock(&unix_table_lock);
302 return s; 279 return s;
303} 280}
304 281
305static struct sock *unix_find_socket_byinode(struct inode *i) 282static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
306{ 283{
307 struct sock *s; 284 struct sock *s;
308 struct hlist_node *node; 285 struct hlist_node *node;
@@ -312,6 +289,9 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
312 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 289 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
313 struct dentry *dentry = unix_sk(s)->dentry; 290 struct dentry *dentry = unix_sk(s)->dentry;
314 291
292 if (s->sk_net != net)
293 continue;
294
315 if(dentry && dentry->d_inode == i) 295 if(dentry && dentry->d_inode == i)
316 { 296 {
317 sock_hold(s); 297 sock_hold(s);
@@ -335,7 +315,7 @@ static void unix_write_space(struct sock *sk)
335 if (unix_writable(sk)) { 315 if (unix_writable(sk)) {
336 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 316 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
337 wake_up_interruptible_sync(sk->sk_sleep); 317 wake_up_interruptible_sync(sk->sk_sleep);
338 sk_wake_async(sk, 2, POLL_OUT); 318 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
339 } 319 }
340 read_unlock(&sk->sk_callback_lock); 320 read_unlock(&sk->sk_callback_lock);
341} 321}
@@ -421,7 +401,7 @@ static int unix_release_sock (struct sock *sk, int embrion)
421 unix_state_unlock(skpair); 401 unix_state_unlock(skpair);
422 skpair->sk_state_change(skpair); 402 skpair->sk_state_change(skpair);
423 read_lock(&skpair->sk_callback_lock); 403 read_lock(&skpair->sk_callback_lock);
424 sk_wake_async(skpair,1,POLL_HUP); 404 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
425 read_unlock(&skpair->sk_callback_lock); 405 read_unlock(&skpair->sk_callback_lock);
426 } 406 }
427 sock_put(skpair); /* It may now die */ 407 sock_put(skpair); /* It may now die */
@@ -457,7 +437,7 @@ static int unix_release_sock (struct sock *sk, int embrion)
457 * What the above comment does talk about? --ANK(980817) 437 * What the above comment does talk about? --ANK(980817)
458 */ 438 */
459 439
460 if (atomic_read(&unix_tot_inflight)) 440 if (unix_tot_inflight)
461 unix_gc(); /* Garbage collect fds */ 441 unix_gc(); /* Garbage collect fds */
462 442
463 return 0; 443 return 0;
@@ -599,21 +579,20 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
599 struct sock *sk = NULL; 579 struct sock *sk = NULL;
600 struct unix_sock *u; 580 struct unix_sock *u;
601 581
602 if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) 582 atomic_inc(&unix_nr_socks);
583 if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
603 goto out; 584 goto out;
604 585
605 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); 586 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
606 if (!sk) 587 if (!sk)
607 goto out; 588 goto out;
608 589
609 atomic_inc(&unix_nr_socks);
610
611 sock_init_data(sock,sk); 590 sock_init_data(sock,sk);
612 lockdep_set_class(&sk->sk_receive_queue.lock, 591 lockdep_set_class(&sk->sk_receive_queue.lock,
613 &af_unix_sk_receive_queue_lock_key); 592 &af_unix_sk_receive_queue_lock_key);
614 593
615 sk->sk_write_space = unix_write_space; 594 sk->sk_write_space = unix_write_space;
616 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; 595 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
617 sk->sk_destruct = unix_sock_destructor; 596 sk->sk_destruct = unix_sock_destructor;
618 u = unix_sk(sk); 597 u = unix_sk(sk);
619 u->dentry = NULL; 598 u->dentry = NULL;
@@ -625,14 +604,13 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
625 init_waitqueue_head(&u->peer_wait); 604 init_waitqueue_head(&u->peer_wait);
626 unix_insert_socket(unix_sockets_unbound, sk); 605 unix_insert_socket(unix_sockets_unbound, sk);
627out: 606out:
607 if (sk == NULL)
608 atomic_dec(&unix_nr_socks);
628 return sk; 609 return sk;
629} 610}
630 611
631static int unix_create(struct net *net, struct socket *sock, int protocol) 612static int unix_create(struct net *net, struct socket *sock, int protocol)
632{ 613{
633 if (net != &init_net)
634 return -EAFNOSUPPORT;
635
636 if (protocol && protocol != PF_UNIX) 614 if (protocol && protocol != PF_UNIX)
637 return -EPROTONOSUPPORT; 615 return -EPROTONOSUPPORT;
638 616
@@ -676,6 +654,7 @@ static int unix_release(struct socket *sock)
676static int unix_autobind(struct socket *sock) 654static int unix_autobind(struct socket *sock)
677{ 655{
678 struct sock *sk = sock->sk; 656 struct sock *sk = sock->sk;
657 struct net *net = sk->sk_net;
679 struct unix_sock *u = unix_sk(sk); 658 struct unix_sock *u = unix_sk(sk);
680 static u32 ordernum = 1; 659 static u32 ordernum = 1;
681 struct unix_address * addr; 660 struct unix_address * addr;
@@ -702,7 +681,7 @@ retry:
702 spin_lock(&unix_table_lock); 681 spin_lock(&unix_table_lock);
703 ordernum = (ordernum+1)&0xFFFFF; 682 ordernum = (ordernum+1)&0xFFFFF;
704 683
705 if (__unix_find_socket_byname(addr->name, addr->len, sock->type, 684 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
706 addr->hash)) { 685 addr->hash)) {
707 spin_unlock(&unix_table_lock); 686 spin_unlock(&unix_table_lock);
708 /* Sanity yield. It is unusual case, but yet... */ 687 /* Sanity yield. It is unusual case, but yet... */
@@ -722,7 +701,8 @@ out: mutex_unlock(&u->readlock);
722 return err; 701 return err;
723} 702}
724 703
725static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, 704static struct sock *unix_find_other(struct net *net,
705 struct sockaddr_un *sunname, int len,
726 int type, unsigned hash, int *error) 706 int type, unsigned hash, int *error)
727{ 707{
728 struct sock *u; 708 struct sock *u;
@@ -740,7 +720,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
740 err = -ECONNREFUSED; 720 err = -ECONNREFUSED;
741 if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) 721 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
742 goto put_fail; 722 goto put_fail;
743 u=unix_find_socket_byinode(nd.dentry->d_inode); 723 u=unix_find_socket_byinode(net, nd.dentry->d_inode);
744 if (!u) 724 if (!u)
745 goto put_fail; 725 goto put_fail;
746 726
@@ -756,7 +736,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
756 } 736 }
757 } else { 737 } else {
758 err = -ECONNREFUSED; 738 err = -ECONNREFUSED;
759 u=unix_find_socket_byname(sunname, len, type, hash); 739 u=unix_find_socket_byname(net, sunname, len, type, hash);
760 if (u) { 740 if (u) {
761 struct dentry *dentry; 741 struct dentry *dentry;
762 dentry = unix_sk(u)->dentry; 742 dentry = unix_sk(u)->dentry;
@@ -778,6 +758,7 @@ fail:
778static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 758static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
779{ 759{
780 struct sock *sk = sock->sk; 760 struct sock *sk = sock->sk;
761 struct net *net = sk->sk_net;
781 struct unix_sock *u = unix_sk(sk); 762 struct unix_sock *u = unix_sk(sk);
782 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 763 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
783 struct dentry * dentry = NULL; 764 struct dentry * dentry = NULL;
@@ -852,7 +833,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
852 833
853 if (!sunaddr->sun_path[0]) { 834 if (!sunaddr->sun_path[0]) {
854 err = -EADDRINUSE; 835 err = -EADDRINUSE;
855 if (__unix_find_socket_byname(sunaddr, addr_len, 836 if (__unix_find_socket_byname(net, sunaddr, addr_len,
856 sk->sk_type, hash)) { 837 sk->sk_type, hash)) {
857 unix_release_addr(addr); 838 unix_release_addr(addr);
858 goto out_unlock; 839 goto out_unlock;
@@ -918,6 +899,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
918 int alen, int flags) 899 int alen, int flags)
919{ 900{
920 struct sock *sk = sock->sk; 901 struct sock *sk = sock->sk;
902 struct net *net = sk->sk_net;
921 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; 903 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
922 struct sock *other; 904 struct sock *other;
923 unsigned hash; 905 unsigned hash;
@@ -934,7 +916,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
934 goto out; 916 goto out;
935 917
936restart: 918restart:
937 other=unix_find_other(sunaddr, alen, sock->type, hash, &err); 919 other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
938 if (!other) 920 if (!other)
939 goto out; 921 goto out;
940 922
@@ -1014,6 +996,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1014{ 996{
1015 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 997 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1016 struct sock *sk = sock->sk; 998 struct sock *sk = sock->sk;
999 struct net *net = sk->sk_net;
1017 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1000 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1018 struct sock *newsk = NULL; 1001 struct sock *newsk = NULL;
1019 struct sock *other = NULL; 1002 struct sock *other = NULL;
@@ -1053,7 +1036,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1053 1036
1054restart: 1037restart:
1055 /* Find listening sock. */ 1038 /* Find listening sock. */
1056 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); 1039 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1057 if (!other) 1040 if (!other)
1058 goto out; 1041 goto out;
1059 1042
@@ -1329,6 +1312,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1329{ 1312{
1330 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1313 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1331 struct sock *sk = sock->sk; 1314 struct sock *sk = sock->sk;
1315 struct net *net = sk->sk_net;
1332 struct unix_sock *u = unix_sk(sk); 1316 struct unix_sock *u = unix_sk(sk);
1333 struct sockaddr_un *sunaddr=msg->msg_name; 1317 struct sockaddr_un *sunaddr=msg->msg_name;
1334 struct sock *other = NULL; 1318 struct sock *other = NULL;
@@ -1392,7 +1376,7 @@ restart:
1392 if (sunaddr == NULL) 1376 if (sunaddr == NULL)
1393 goto out_free; 1377 goto out_free;
1394 1378
1395 other = unix_find_other(sunaddr, namelen, sk->sk_type, 1379 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1396 hash, &err); 1380 hash, &err);
1397 if (other==NULL) 1381 if (other==NULL)
1398 goto out_free; 1382 goto out_free;
@@ -1636,8 +1620,15 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1636 mutex_lock(&u->readlock); 1620 mutex_lock(&u->readlock);
1637 1621
1638 skb = skb_recv_datagram(sk, flags, noblock, &err); 1622 skb = skb_recv_datagram(sk, flags, noblock, &err);
1639 if (!skb) 1623 if (!skb) {
1624 unix_state_lock(sk);
1625 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1626 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1627 (sk->sk_shutdown & RCV_SHUTDOWN))
1628 err = 0;
1629 unix_state_unlock(sk);
1640 goto out_unlock; 1630 goto out_unlock;
1631 }
1641 1632
1642 wake_up_interruptible_sync(&u->peer_wait); 1633 wake_up_interruptible_sync(&u->peer_wait);
1643 1634
@@ -1907,9 +1898,9 @@ static int unix_shutdown(struct socket *sock, int mode)
1907 other->sk_state_change(other); 1898 other->sk_state_change(other);
1908 read_lock(&other->sk_callback_lock); 1899 read_lock(&other->sk_callback_lock);
1909 if (peer_mode == SHUTDOWN_MASK) 1900 if (peer_mode == SHUTDOWN_MASK)
1910 sk_wake_async(other,1,POLL_HUP); 1901 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1911 else if (peer_mode & RCV_SHUTDOWN) 1902 else if (peer_mode & RCV_SHUTDOWN)
1912 sk_wake_async(other,1,POLL_IN); 1903 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1913 read_unlock(&other->sk_callback_lock); 1904 read_unlock(&other->sk_callback_lock);
1914 } 1905 }
1915 if (other) 1906 if (other)
@@ -1998,12 +1989,41 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
1998 1989
1999 1990
2000#ifdef CONFIG_PROC_FS 1991#ifdef CONFIG_PROC_FS
2001static struct sock *unix_seq_idx(int *iter, loff_t pos) 1992static struct sock *first_unix_socket(int *i)
1993{
1994 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
1995 if (!hlist_empty(&unix_socket_table[*i]))
1996 return __sk_head(&unix_socket_table[*i]);
1997 }
1998 return NULL;
1999}
2000
2001static struct sock *next_unix_socket(int *i, struct sock *s)
2002{
2003 struct sock *next = sk_next(s);
2004 /* More in this chain? */
2005 if (next)
2006 return next;
2007 /* Look for next non-empty chain. */
2008 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2009 if (!hlist_empty(&unix_socket_table[*i]))
2010 return __sk_head(&unix_socket_table[*i]);
2011 }
2012 return NULL;
2013}
2014
2015struct unix_iter_state {
2016 struct seq_net_private p;
2017 int i;
2018};
2019static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
2002{ 2020{
2003 loff_t off = 0; 2021 loff_t off = 0;
2004 struct sock *s; 2022 struct sock *s;
2005 2023
2006 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { 2024 for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2025 if (s->sk_net != iter->p.net)
2026 continue;
2007 if (off == pos) 2027 if (off == pos)
2008 return s; 2028 return s;
2009 ++off; 2029 ++off;
@@ -2013,21 +2033,30 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos)
2013 2033
2014 2034
2015static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 2035static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2036 __acquires(unix_table_lock)
2016{ 2037{
2038 struct unix_iter_state *iter = seq->private;
2017 spin_lock(&unix_table_lock); 2039 spin_lock(&unix_table_lock);
2018 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); 2040 return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1);
2019} 2041}
2020 2042
2021static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2043static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2022{ 2044{
2045 struct unix_iter_state *iter = seq->private;
2046 struct sock *sk = v;
2023 ++*pos; 2047 ++*pos;
2024 2048
2025 if (v == (void *)1) 2049 if (v == (void *)1)
2026 return first_unix_socket(seq->private); 2050 sk = first_unix_socket(&iter->i);
2027 return next_unix_socket(seq->private, v); 2051 else
2052 sk = next_unix_socket(&iter->i, sk);
2053 while (sk && (sk->sk_net != iter->p.net))
2054 sk = next_unix_socket(&iter->i, sk);
2055 return sk;
2028} 2056}
2029 2057
2030static void unix_seq_stop(struct seq_file *seq, void *v) 2058static void unix_seq_stop(struct seq_file *seq, void *v)
2059 __releases(unix_table_lock)
2031{ 2060{
2032 spin_unlock(&unix_table_lock); 2061 spin_unlock(&unix_table_lock);
2033} 2062}
@@ -2086,7 +2115,8 @@ static const struct seq_operations unix_seq_ops = {
2086 2115
2087static int unix_seq_open(struct inode *inode, struct file *file) 2116static int unix_seq_open(struct inode *inode, struct file *file)
2088{ 2117{
2089 return seq_open_private(file, &unix_seq_ops, sizeof(int)); 2118 return seq_open_net(inode, file, &unix_seq_ops,
2119 sizeof(struct unix_iter_state));
2090} 2120}
2091 2121
2092static const struct file_operations unix_seq_fops = { 2122static const struct file_operations unix_seq_fops = {
@@ -2094,7 +2124,7 @@ static const struct file_operations unix_seq_fops = {
2094 .open = unix_seq_open, 2124 .open = unix_seq_open,
2095 .read = seq_read, 2125 .read = seq_read,
2096 .llseek = seq_lseek, 2126 .llseek = seq_lseek,
2097 .release = seq_release_private, 2127 .release = seq_release_net,
2098}; 2128};
2099 2129
2100#endif 2130#endif
@@ -2105,6 +2135,37 @@ static struct net_proto_family unix_family_ops = {
2105 .owner = THIS_MODULE, 2135 .owner = THIS_MODULE,
2106}; 2136};
2107 2137
2138
2139static int unix_net_init(struct net *net)
2140{
2141 int error = -ENOMEM;
2142
2143 net->unx.sysctl_max_dgram_qlen = 10;
2144 if (unix_sysctl_register(net))
2145 goto out;
2146
2147#ifdef CONFIG_PROC_FS
2148 if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2149 unix_sysctl_unregister(net);
2150 goto out;
2151 }
2152#endif
2153 error = 0;
2154out:
2155 return 0;
2156}
2157
2158static void unix_net_exit(struct net *net)
2159{
2160 unix_sysctl_unregister(net);
2161 proc_net_remove(net, "unix");
2162}
2163
2164static struct pernet_operations unix_net_ops = {
2165 .init = unix_net_init,
2166 .exit = unix_net_exit,
2167};
2168
2108static int __init af_unix_init(void) 2169static int __init af_unix_init(void)
2109{ 2170{
2110 int rc = -1; 2171 int rc = -1;
@@ -2120,10 +2181,7 @@ static int __init af_unix_init(void)
2120 } 2181 }
2121 2182
2122 sock_register(&unix_family_ops); 2183 sock_register(&unix_family_ops);
2123#ifdef CONFIG_PROC_FS 2184 register_pernet_subsys(&unix_net_ops);
2124 proc_net_fops_create(&init_net, "unix", 0, &unix_seq_fops);
2125#endif
2126 unix_sysctl_register();
2127out: 2185out:
2128 return rc; 2186 return rc;
2129} 2187}
@@ -2131,9 +2189,8 @@ out:
2131static void __exit af_unix_exit(void) 2189static void __exit af_unix_exit(void)
2132{ 2190{
2133 sock_unregister(PF_UNIX); 2191 sock_unregister(PF_UNIX);
2134 unix_sysctl_unregister();
2135 proc_net_remove(&init_net, "unix");
2136 proto_unregister(&unix_proto); 2192 proto_unregister(&unix_proto);
2193 unregister_pernet_subsys(&unix_net_ops);
2137} 2194}
2138 2195
2139module_init(af_unix_init); 2196module_init(af_unix_init);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 406b6433e467..ebdff3d877a1 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -92,7 +92,7 @@ static LIST_HEAD(gc_inflight_list);
92static LIST_HEAD(gc_candidates); 92static LIST_HEAD(gc_candidates);
93static DEFINE_SPINLOCK(unix_gc_lock); 93static DEFINE_SPINLOCK(unix_gc_lock);
94 94
95atomic_t unix_tot_inflight = ATOMIC_INIT(0); 95unsigned int unix_tot_inflight;
96 96
97 97
98static struct sock *unix_get_socket(struct file *filp) 98static struct sock *unix_get_socket(struct file *filp)
@@ -133,7 +133,7 @@ void unix_inflight(struct file *fp)
133 } else { 133 } else {
134 BUG_ON(list_empty(&u->link)); 134 BUG_ON(list_empty(&u->link));
135 } 135 }
136 atomic_inc(&unix_tot_inflight); 136 unix_tot_inflight++;
137 spin_unlock(&unix_gc_lock); 137 spin_unlock(&unix_gc_lock);
138 } 138 }
139} 139}
@@ -147,7 +147,7 @@ void unix_notinflight(struct file *fp)
147 BUG_ON(list_empty(&u->link)); 147 BUG_ON(list_empty(&u->link));
148 if (atomic_dec_and_test(&u->inflight)) 148 if (atomic_dec_and_test(&u->inflight))
149 list_del_init(&u->link); 149 list_del_init(&u->link);
150 atomic_dec(&unix_tot_inflight); 150 unix_tot_inflight--;
151 spin_unlock(&unix_gc_lock); 151 spin_unlock(&unix_gc_lock);
152 } 152 }
153} 153}
@@ -161,7 +161,7 @@ static inline struct sk_buff *sock_queue_head(struct sock *sk)
161 for (skb = sock_queue_head(sk)->next, next = skb->next; \ 161 for (skb = sock_queue_head(sk)->next, next = skb->next; \
162 skb != sock_queue_head(sk); skb = next, next = skb->next) 162 skb != sock_queue_head(sk); skb = next, next = skb->next)
163 163
164static void scan_inflight(struct sock *x, void (*func)(struct sock *), 164static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
165 struct sk_buff_head *hitlist) 165 struct sk_buff_head *hitlist)
166{ 166{
167 struct sk_buff *skb; 167 struct sk_buff *skb;
@@ -185,9 +185,9 @@ static void scan_inflight(struct sock *x, void (*func)(struct sock *),
185 * if it indeed does so 185 * if it indeed does so
186 */ 186 */
187 struct sock *sk = unix_get_socket(*fp++); 187 struct sock *sk = unix_get_socket(*fp++);
188 if(sk) { 188 if (sk) {
189 hit = true; 189 hit = true;
190 func(sk); 190 func(unix_sk(sk));
191 } 191 }
192 } 192 }
193 if (hit && hitlist != NULL) { 193 if (hit && hitlist != NULL) {
@@ -199,7 +199,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct sock *),
199 spin_unlock(&x->sk_receive_queue.lock); 199 spin_unlock(&x->sk_receive_queue.lock);
200} 200}
201 201
202static void scan_children(struct sock *x, void (*func)(struct sock *), 202static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
203 struct sk_buff_head *hitlist) 203 struct sk_buff_head *hitlist)
204{ 204{
205 if (x->sk_state != TCP_LISTEN) 205 if (x->sk_state != TCP_LISTEN)
@@ -235,20 +235,18 @@ static void scan_children(struct sock *x, void (*func)(struct sock *),
235 } 235 }
236} 236}
237 237
238static void dec_inflight(struct sock *sk) 238static void dec_inflight(struct unix_sock *usk)
239{ 239{
240 atomic_dec(&unix_sk(sk)->inflight); 240 atomic_dec(&usk->inflight);
241} 241}
242 242
243static void inc_inflight(struct sock *sk) 243static void inc_inflight(struct unix_sock *usk)
244{ 244{
245 atomic_inc(&unix_sk(sk)->inflight); 245 atomic_inc(&usk->inflight);
246} 246}
247 247
248static void inc_inflight_move_tail(struct sock *sk) 248static void inc_inflight_move_tail(struct unix_sock *u)
249{ 249{
250 struct unix_sock *u = unix_sk(sk);
251
252 atomic_inc(&u->inflight); 250 atomic_inc(&u->inflight);
253 /* 251 /*
254 * If this is still a candidate, move it to the end of the 252 * If this is still a candidate, move it to the end of the
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index eb0bd57ebada..77513d7e35f2 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -18,7 +18,7 @@ static ctl_table unix_table[] = {
18 { 18 {
19 .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, 19 .ctl_name = NET_UNIX_MAX_DGRAM_QLEN,
20 .procname = "max_dgram_qlen", 20 .procname = "max_dgram_qlen",
21 .data = &sysctl_unix_max_dgram_qlen, 21 .data = &init_net.unx.sysctl_max_dgram_qlen,
22 .maxlen = sizeof(int), 22 .maxlen = sizeof(int),
23 .mode = 0644, 23 .mode = 0644,
24 .proc_handler = &proc_dointvec 24 .proc_handler = &proc_dointvec
@@ -26,35 +26,39 @@ static ctl_table unix_table[] = {
26 { .ctl_name = 0 } 26 { .ctl_name = 0 }
27}; 27};
28 28
29static ctl_table unix_net_table[] = { 29static struct ctl_path unix_path[] = {
30 { 30 { .procname = "net", .ctl_name = CTL_NET, },
31 .ctl_name = NET_UNIX, 31 { .procname = "unix", .ctl_name = NET_UNIX, },
32 .procname = "unix", 32 { },
33 .mode = 0555,
34 .child = unix_table
35 },
36 { .ctl_name = 0 }
37}; 33};
38 34
39static ctl_table unix_root_table[] = { 35int unix_sysctl_register(struct net *net)
40 { 36{
41 .ctl_name = CTL_NET, 37 struct ctl_table *table;
42 .procname = "net",
43 .mode = 0555,
44 .child = unix_net_table
45 },
46 { .ctl_name = 0 }
47};
48 38
49static struct ctl_table_header * unix_sysctl_header; 39 table = kmemdup(unix_table, sizeof(unix_table), GFP_KERNEL);
40 if (table == NULL)
41 goto err_alloc;
50 42
51void unix_sysctl_register(void) 43 table[0].data = &net->unx.sysctl_max_dgram_qlen;
52{ 44 net->unx.ctl = register_net_sysctl_table(net, unix_path, table);
53 unix_sysctl_header = register_sysctl_table(unix_root_table); 45 if (net->unx.ctl == NULL)
46 goto err_reg;
47
48 return 0;
49
50err_reg:
51 kfree(table);
52err_alloc:
53 return -ENOMEM;
54} 54}
55 55
56void unix_sysctl_unregister(void) 56void unix_sysctl_unregister(struct net *net)
57{ 57{
58 unregister_sysctl_table(unix_sysctl_header); 58 struct ctl_table *table;
59
60 table = net->unx.ctl->ctl_table_arg;
61 unregister_sysctl_table(net->unx.ctl);
62 kfree(table);
59} 63}
60 64
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6426055a8be0..79270903bda6 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -6,13 +6,13 @@ config NL80211
6 depends on CFG80211 6 depends on CFG80211
7 default y 7 default y
8 ---help--- 8 ---help---
9 This option turns on the new netlink interface 9 This option turns on the new netlink interface
10 (nl80211) support in cfg80211. 10 (nl80211) support in cfg80211.
11 11
12 If =n, drivers using mac80211 will be configured via 12 If =n, drivers using mac80211 will be configured via
13 wireless extension support provided by that subsystem. 13 wireless extension support provided by that subsystem.
14 14
15 If unsure, say Y. 15 If unsure, say Y.
16 16
17config WIRELESS_EXT 17config WIRELESS_EXT
18 bool "Wireless extensions" 18 bool "Wireless extensions"
diff --git a/net/wireless/core.c b/net/wireless/core.c
index febc33bc9c09..cfc5fc5f9e75 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -184,6 +184,9 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
184 struct cfg80211_registered_device *drv; 184 struct cfg80211_registered_device *drv;
185 int alloc_size; 185 int alloc_size;
186 186
187 WARN_ON(!ops->add_key && ops->del_key);
188 WARN_ON(ops->add_key && !ops->del_key);
189
187 alloc_size = sizeof(*drv) + sizeof_priv; 190 alloc_size = sizeof(*drv) + sizeof_priv;
188 191
189 drv = kzalloc(alloc_size, GFP_KERNEL); 192 drv = kzalloc(alloc_size, GFP_KERNEL);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 48b0d453e4e1..e3a214f63f91 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -61,6 +61,27 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
61 [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, 61 [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
62 [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, 62 [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
63 [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, 63 [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
64
65 [NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN },
66
67 [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
68 .len = WLAN_MAX_KEY_LEN },
69 [NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 },
70 [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
71 [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
72
73 [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
74 [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
75 [NL80211_ATTR_BEACON_HEAD] = { .type = NLA_BINARY,
76 .len = IEEE80211_MAX_DATA_LEN },
77 [NL80211_ATTR_BEACON_TAIL] = { .type = NLA_BINARY,
78 .len = IEEE80211_MAX_DATA_LEN },
79 [NL80211_ATTR_STA_AID] = { .type = NLA_U16 },
80 [NL80211_ATTR_STA_FLAGS] = { .type = NLA_NESTED },
81 [NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 },
82 [NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY,
83 .len = NL80211_MAX_SUPP_RATES },
84 [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 },
64}; 85};
65 86
66/* message building helper */ 87/* message building helper */
@@ -335,6 +356,655 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
335 return err; 356 return err;
336} 357}
337 358
359struct get_key_cookie {
360 struct sk_buff *msg;
361 int error;
362};
363
364static void get_key_callback(void *c, struct key_params *params)
365{
366 struct get_key_cookie *cookie = c;
367
368 if (params->key)
369 NLA_PUT(cookie->msg, NL80211_ATTR_KEY_DATA,
370 params->key_len, params->key);
371
372 if (params->seq)
373 NLA_PUT(cookie->msg, NL80211_ATTR_KEY_SEQ,
374 params->seq_len, params->seq);
375
376 if (params->cipher)
377 NLA_PUT_U32(cookie->msg, NL80211_ATTR_KEY_CIPHER,
378 params->cipher);
379
380 return;
381 nla_put_failure:
382 cookie->error = 1;
383}
384
385static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
386{
387 struct cfg80211_registered_device *drv;
388 int err;
389 struct net_device *dev;
390 u8 key_idx = 0;
391 u8 *mac_addr = NULL;
392 struct get_key_cookie cookie = {
393 .error = 0,
394 };
395 void *hdr;
396 struct sk_buff *msg;
397
398 if (info->attrs[NL80211_ATTR_KEY_IDX])
399 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
400
401 if (key_idx > 3)
402 return -EINVAL;
403
404 if (info->attrs[NL80211_ATTR_MAC])
405 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
406
407 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
408 if (err)
409 return err;
410
411 if (!drv->ops->get_key) {
412 err = -EOPNOTSUPP;
413 goto out;
414 }
415
416 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
417 if (!msg) {
418 err = -ENOMEM;
419 goto out;
420 }
421
422 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
423 NL80211_CMD_NEW_KEY);
424
425 if (IS_ERR(hdr)) {
426 err = PTR_ERR(hdr);
427 goto out;
428 }
429
430 cookie.msg = msg;
431
432 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
433 NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_idx);
434 if (mac_addr)
435 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
436
437 rtnl_lock();
438 err = drv->ops->get_key(&drv->wiphy, dev, key_idx, mac_addr,
439 &cookie, get_key_callback);
440 rtnl_unlock();
441
442 if (err)
443 goto out;
444
445 if (cookie.error)
446 goto nla_put_failure;
447
448 genlmsg_end(msg, hdr);
449 err = genlmsg_unicast(msg, info->snd_pid);
450 goto out;
451
452 nla_put_failure:
453 err = -ENOBUFS;
454 nlmsg_free(msg);
455 out:
456 cfg80211_put_dev(drv);
457 dev_put(dev);
458 return err;
459}
460
461static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
462{
463 struct cfg80211_registered_device *drv;
464 int err;
465 struct net_device *dev;
466 u8 key_idx;
467
468 if (!info->attrs[NL80211_ATTR_KEY_IDX])
469 return -EINVAL;
470
471 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
472
473 if (key_idx > 3)
474 return -EINVAL;
475
476 /* currently only support setting default key */
477 if (!info->attrs[NL80211_ATTR_KEY_DEFAULT])
478 return -EINVAL;
479
480 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
481 if (err)
482 return err;
483
484 if (!drv->ops->set_default_key) {
485 err = -EOPNOTSUPP;
486 goto out;
487 }
488
489 rtnl_lock();
490 err = drv->ops->set_default_key(&drv->wiphy, dev, key_idx);
491 rtnl_unlock();
492
493 out:
494 cfg80211_put_dev(drv);
495 dev_put(dev);
496 return err;
497}
498
499static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
500{
501 struct cfg80211_registered_device *drv;
502 int err;
503 struct net_device *dev;
504 struct key_params params;
505 u8 key_idx = 0;
506 u8 *mac_addr = NULL;
507
508 memset(&params, 0, sizeof(params));
509
510 if (!info->attrs[NL80211_ATTR_KEY_CIPHER])
511 return -EINVAL;
512
513 if (info->attrs[NL80211_ATTR_KEY_DATA]) {
514 params.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]);
515 params.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]);
516 }
517
518 if (info->attrs[NL80211_ATTR_KEY_IDX])
519 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
520
521 params.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]);
522
523 if (info->attrs[NL80211_ATTR_MAC])
524 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
525
526 if (key_idx > 3)
527 return -EINVAL;
528
529 /*
530 * Disallow pairwise keys with non-zero index unless it's WEP
531 * (because current deployments use pairwise WEP keys with
532 * non-zero indizes but 802.11i clearly specifies to use zero)
533 */
534 if (mac_addr && key_idx &&
535 params.cipher != WLAN_CIPHER_SUITE_WEP40 &&
536 params.cipher != WLAN_CIPHER_SUITE_WEP104)
537 return -EINVAL;
538
539 /* TODO: add definitions for the lengths to linux/ieee80211.h */
540 switch (params.cipher) {
541 case WLAN_CIPHER_SUITE_WEP40:
542 if (params.key_len != 5)
543 return -EINVAL;
544 break;
545 case WLAN_CIPHER_SUITE_TKIP:
546 if (params.key_len != 32)
547 return -EINVAL;
548 break;
549 case WLAN_CIPHER_SUITE_CCMP:
550 if (params.key_len != 16)
551 return -EINVAL;
552 break;
553 case WLAN_CIPHER_SUITE_WEP104:
554 if (params.key_len != 13)
555 return -EINVAL;
556 break;
557 default:
558 return -EINVAL;
559 }
560
561 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
562 if (err)
563 return err;
564
565 if (!drv->ops->add_key) {
566 err = -EOPNOTSUPP;
567 goto out;
568 }
569
570 rtnl_lock();
571 err = drv->ops->add_key(&drv->wiphy, dev, key_idx, mac_addr, &params);
572 rtnl_unlock();
573
574 out:
575 cfg80211_put_dev(drv);
576 dev_put(dev);
577 return err;
578}
579
580static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
581{
582 struct cfg80211_registered_device *drv;
583 int err;
584 struct net_device *dev;
585 u8 key_idx = 0;
586 u8 *mac_addr = NULL;
587
588 if (info->attrs[NL80211_ATTR_KEY_IDX])
589 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
590
591 if (key_idx > 3)
592 return -EINVAL;
593
594 if (info->attrs[NL80211_ATTR_MAC])
595 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
596
597 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
598 if (err)
599 return err;
600
601 if (!drv->ops->del_key) {
602 err = -EOPNOTSUPP;
603 goto out;
604 }
605
606 rtnl_lock();
607 err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr);
608 rtnl_unlock();
609
610 out:
611 cfg80211_put_dev(drv);
612 dev_put(dev);
613 return err;
614}
615
616static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
617{
618 int (*call)(struct wiphy *wiphy, struct net_device *dev,
619 struct beacon_parameters *info);
620 struct cfg80211_registered_device *drv;
621 int err;
622 struct net_device *dev;
623 struct beacon_parameters params;
624 int haveinfo = 0;
625
626 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
627 if (err)
628 return err;
629
630 switch (info->genlhdr->cmd) {
631 case NL80211_CMD_NEW_BEACON:
632 /* these are required for NEW_BEACON */
633 if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] ||
634 !info->attrs[NL80211_ATTR_DTIM_PERIOD] ||
635 !info->attrs[NL80211_ATTR_BEACON_HEAD]) {
636 err = -EINVAL;
637 goto out;
638 }
639
640 call = drv->ops->add_beacon;
641 break;
642 case NL80211_CMD_SET_BEACON:
643 call = drv->ops->set_beacon;
644 break;
645 default:
646 WARN_ON(1);
647 err = -EOPNOTSUPP;
648 goto out;
649 }
650
651 if (!call) {
652 err = -EOPNOTSUPP;
653 goto out;
654 }
655
656 memset(&params, 0, sizeof(params));
657
658 if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
659 params.interval =
660 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
661 haveinfo = 1;
662 }
663
664 if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
665 params.dtim_period =
666 nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
667 haveinfo = 1;
668 }
669
670 if (info->attrs[NL80211_ATTR_BEACON_HEAD]) {
671 params.head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]);
672 params.head_len =
673 nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]);
674 haveinfo = 1;
675 }
676
677 if (info->attrs[NL80211_ATTR_BEACON_TAIL]) {
678 params.tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]);
679 params.tail_len =
680 nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]);
681 haveinfo = 1;
682 }
683
684 if (!haveinfo) {
685 err = -EINVAL;
686 goto out;
687 }
688
689 rtnl_lock();
690 err = call(&drv->wiphy, dev, &params);
691 rtnl_unlock();
692
693 out:
694 cfg80211_put_dev(drv);
695 dev_put(dev);
696 return err;
697}
698
699static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
700{
701 struct cfg80211_registered_device *drv;
702 int err;
703 struct net_device *dev;
704
705 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
706 if (err)
707 return err;
708
709 if (!drv->ops->del_beacon) {
710 err = -EOPNOTSUPP;
711 goto out;
712 }
713
714 rtnl_lock();
715 err = drv->ops->del_beacon(&drv->wiphy, dev);
716 rtnl_unlock();
717
718 out:
719 cfg80211_put_dev(drv);
720 dev_put(dev);
721 return err;
722}
723
724static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
725 [NL80211_STA_FLAG_AUTHORIZED] = { .type = NLA_FLAG },
726 [NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG },
727 [NL80211_STA_FLAG_WME] = { .type = NLA_FLAG },
728};
729
730static int parse_station_flags(struct nlattr *nla, u32 *staflags)
731{
732 struct nlattr *flags[NL80211_STA_FLAG_MAX + 1];
733 int flag;
734
735 *staflags = 0;
736
737 if (!nla)
738 return 0;
739
740 if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX,
741 nla, sta_flags_policy))
742 return -EINVAL;
743
744 *staflags = STATION_FLAG_CHANGED;
745
746 for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++)
747 if (flags[flag])
748 *staflags |= (1<<flag);
749
750 return 0;
751}
752
753static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
754 int flags, struct net_device *dev,
755 u8 *mac_addr, struct station_stats *stats)
756{
757 void *hdr;
758 struct nlattr *statsattr;
759
760 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
761 if (!hdr)
762 return -1;
763
764 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
765 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
766
767 statsattr = nla_nest_start(msg, NL80211_ATTR_STA_STATS);
768 if (!statsattr)
769 goto nla_put_failure;
770 if (stats->filled & STATION_STAT_INACTIVE_TIME)
771 NLA_PUT_U32(msg, NL80211_STA_STAT_INACTIVE_TIME,
772 stats->inactive_time);
773 if (stats->filled & STATION_STAT_RX_BYTES)
774 NLA_PUT_U32(msg, NL80211_STA_STAT_RX_BYTES,
775 stats->rx_bytes);
776 if (stats->filled & STATION_STAT_TX_BYTES)
777 NLA_PUT_U32(msg, NL80211_STA_STAT_TX_BYTES,
778 stats->tx_bytes);
779
780 nla_nest_end(msg, statsattr);
781
782 return genlmsg_end(msg, hdr);
783
784 nla_put_failure:
785 return genlmsg_cancel(msg, hdr);
786}
787
788
789static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
790{
791 struct cfg80211_registered_device *drv;
792 int err;
793 struct net_device *dev;
794 struct station_stats stats;
795 struct sk_buff *msg;
796 u8 *mac_addr = NULL;
797
798 memset(&stats, 0, sizeof(stats));
799
800 if (!info->attrs[NL80211_ATTR_MAC])
801 return -EINVAL;
802
803 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
804
805 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
806 if (err)
807 return err;
808
809 if (!drv->ops->get_station) {
810 err = -EOPNOTSUPP;
811 goto out;
812 }
813
814 rtnl_lock();
815 err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &stats);
816 rtnl_unlock();
817
818 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
819 if (!msg)
820 goto out;
821
822 if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0,
823 dev, mac_addr, &stats) < 0)
824 goto out_free;
825
826 err = genlmsg_unicast(msg, info->snd_pid);
827 goto out;
828
829 out_free:
830 nlmsg_free(msg);
831
832 out:
833 cfg80211_put_dev(drv);
834 dev_put(dev);
835 return err;
836}
837
838/*
839 * Get vlan interface making sure it is on the right wiphy.
840 */
841static int get_vlan(struct nlattr *vlanattr,
842 struct cfg80211_registered_device *rdev,
843 struct net_device **vlan)
844{
845 *vlan = NULL;
846
847 if (vlanattr) {
848 *vlan = dev_get_by_index(&init_net, nla_get_u32(vlanattr));
849 if (!*vlan)
850 return -ENODEV;
851 if (!(*vlan)->ieee80211_ptr)
852 return -EINVAL;
853 if ((*vlan)->ieee80211_ptr->wiphy != &rdev->wiphy)
854 return -EINVAL;
855 }
856 return 0;
857}
858
859static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
860{
861 struct cfg80211_registered_device *drv;
862 int err;
863 struct net_device *dev;
864 struct station_parameters params;
865 u8 *mac_addr = NULL;
866
867 memset(&params, 0, sizeof(params));
868
869 params.listen_interval = -1;
870
871 if (info->attrs[NL80211_ATTR_STA_AID])
872 return -EINVAL;
873
874 if (!info->attrs[NL80211_ATTR_MAC])
875 return -EINVAL;
876
877 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
878
879 if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) {
880 params.supported_rates =
881 nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
882 params.supported_rates_len =
883 nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
884 }
885
886 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
887 params.listen_interval =
888 nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
889
890 if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
891 &params.station_flags))
892 return -EINVAL;
893
894 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
895 if (err)
896 return err;
897
898 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
899 if (err)
900 goto out;
901
902 if (!drv->ops->change_station) {
903 err = -EOPNOTSUPP;
904 goto out;
905 }
906
907 rtnl_lock();
908 err = drv->ops->change_station(&drv->wiphy, dev, mac_addr, &params);
909 rtnl_unlock();
910
911 out:
912 if (params.vlan)
913 dev_put(params.vlan);
914 cfg80211_put_dev(drv);
915 dev_put(dev);
916 return err;
917}
918
919static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
920{
921 struct cfg80211_registered_device *drv;
922 int err;
923 struct net_device *dev;
924 struct station_parameters params;
925 u8 *mac_addr = NULL;
926
927 memset(&params, 0, sizeof(params));
928
929 if (!info->attrs[NL80211_ATTR_MAC])
930 return -EINVAL;
931
932 if (!info->attrs[NL80211_ATTR_STA_AID])
933 return -EINVAL;
934
935 if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
936 return -EINVAL;
937
938 if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES])
939 return -EINVAL;
940
941 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
942 params.supported_rates =
943 nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
944 params.supported_rates_len =
945 nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
946 params.listen_interval =
947 nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
948 params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
949
950 if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
951 &params.station_flags))
952 return -EINVAL;
953
954 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
955 if (err)
956 return err;
957
958 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
959 if (err)
960 goto out;
961
962 if (!drv->ops->add_station) {
963 err = -EOPNOTSUPP;
964 goto out;
965 }
966
967 rtnl_lock();
968 err = drv->ops->add_station(&drv->wiphy, dev, mac_addr, &params);
969 rtnl_unlock();
970
971 out:
972 if (params.vlan)
973 dev_put(params.vlan);
974 cfg80211_put_dev(drv);
975 dev_put(dev);
976 return err;
977}
978
979static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
980{
981 struct cfg80211_registered_device *drv;
982 int err;
983 struct net_device *dev;
984 u8 *mac_addr = NULL;
985
986 if (info->attrs[NL80211_ATTR_MAC])
987 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
988
989 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
990 if (err)
991 return err;
992
993 if (!drv->ops->del_station) {
994 err = -EOPNOTSUPP;
995 goto out;
996 }
997
998 rtnl_lock();
999 err = drv->ops->del_station(&drv->wiphy, dev, mac_addr);
1000 rtnl_unlock();
1001
1002 out:
1003 cfg80211_put_dev(drv);
1004 dev_put(dev);
1005 return err;
1006}
1007
338static struct genl_ops nl80211_ops[] = { 1008static struct genl_ops nl80211_ops[] = {
339 { 1009 {
340 .cmd = NL80211_CMD_GET_WIPHY, 1010 .cmd = NL80211_CMD_GET_WIPHY,
@@ -374,6 +1044,73 @@ static struct genl_ops nl80211_ops[] = {
374 .policy = nl80211_policy, 1044 .policy = nl80211_policy,
375 .flags = GENL_ADMIN_PERM, 1045 .flags = GENL_ADMIN_PERM,
376 }, 1046 },
1047 {
1048 .cmd = NL80211_CMD_GET_KEY,
1049 .doit = nl80211_get_key,
1050 .policy = nl80211_policy,
1051 .flags = GENL_ADMIN_PERM,
1052 },
1053 {
1054 .cmd = NL80211_CMD_SET_KEY,
1055 .doit = nl80211_set_key,
1056 .policy = nl80211_policy,
1057 .flags = GENL_ADMIN_PERM,
1058 },
1059 {
1060 .cmd = NL80211_CMD_NEW_KEY,
1061 .doit = nl80211_new_key,
1062 .policy = nl80211_policy,
1063 .flags = GENL_ADMIN_PERM,
1064 },
1065 {
1066 .cmd = NL80211_CMD_DEL_KEY,
1067 .doit = nl80211_del_key,
1068 .policy = nl80211_policy,
1069 .flags = GENL_ADMIN_PERM,
1070 },
1071 {
1072 .cmd = NL80211_CMD_SET_BEACON,
1073 .policy = nl80211_policy,
1074 .flags = GENL_ADMIN_PERM,
1075 .doit = nl80211_addset_beacon,
1076 },
1077 {
1078 .cmd = NL80211_CMD_NEW_BEACON,
1079 .policy = nl80211_policy,
1080 .flags = GENL_ADMIN_PERM,
1081 .doit = nl80211_addset_beacon,
1082 },
1083 {
1084 .cmd = NL80211_CMD_DEL_BEACON,
1085 .policy = nl80211_policy,
1086 .flags = GENL_ADMIN_PERM,
1087 .doit = nl80211_del_beacon,
1088 },
1089 {
1090 .cmd = NL80211_CMD_GET_STATION,
1091 .doit = nl80211_get_station,
1092 /* TODO: implement dumpit */
1093 .policy = nl80211_policy,
1094 .flags = GENL_ADMIN_PERM,
1095 },
1096 {
1097 .cmd = NL80211_CMD_SET_STATION,
1098 .doit = nl80211_set_station,
1099 .policy = nl80211_policy,
1100 .flags = GENL_ADMIN_PERM,
1101 },
1102 {
1103 .cmd = NL80211_CMD_NEW_STATION,
1104 .doit = nl80211_new_station,
1105 .policy = nl80211_policy,
1106 .flags = GENL_ADMIN_PERM,
1107 },
1108 {
1109 .cmd = NL80211_CMD_DEL_STATION,
1110 .doit = nl80211_del_station,
1111 .policy = nl80211_policy,
1112 .flags = GENL_ADMIN_PERM,
1113 },
377}; 1114};
378 1115
379/* multicast groups */ 1116/* multicast groups */
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 85e5f9dd0d8e..2c569b63e7d8 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -417,20 +417,6 @@ static const int event_type_size[] = {
417 IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ 417 IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */
418}; 418};
419 419
420/* Size (in bytes) of various events, as packed */
421static const int event_type_pk_size[] = {
422 IW_EV_LCP_PK_LEN, /* IW_HEADER_TYPE_NULL */
423 0,
424 IW_EV_CHAR_PK_LEN, /* IW_HEADER_TYPE_CHAR */
425 0,
426 IW_EV_UINT_PK_LEN, /* IW_HEADER_TYPE_UINT */
427 IW_EV_FREQ_PK_LEN, /* IW_HEADER_TYPE_FREQ */
428 IW_EV_ADDR_PK_LEN, /* IW_HEADER_TYPE_ADDR */
429 0,
430 IW_EV_POINT_PK_LEN, /* Without variable payload */
431 IW_EV_PARAM_PK_LEN, /* IW_HEADER_TYPE_PARAM */
432 IW_EV_QUAL_PK_LEN, /* IW_HEADER_TYPE_QUAL */
433};
434 420
435/************************ COMMON SUBROUTINES ************************/ 421/************************ COMMON SUBROUTINES ************************/
436/* 422/*
@@ -673,26 +659,8 @@ static const struct seq_operations wireless_seq_ops = {
673 659
674static int wireless_seq_open(struct inode *inode, struct file *file) 660static int wireless_seq_open(struct inode *inode, struct file *file)
675{ 661{
676 struct seq_file *seq; 662 return seq_open_net(inode, file, &wireless_seq_ops,
677 int res; 663 sizeof(struct seq_net_private));
678 res = seq_open(file, &wireless_seq_ops);
679 if (!res) {
680 seq = file->private_data;
681 seq->private = get_proc_net(inode);
682 if (!seq->private) {
683 seq_release(inode, file);
684 res = -ENXIO;
685 }
686 }
687 return res;
688}
689
690static int wireless_seq_release(struct inode *inode, struct file *file)
691{
692 struct seq_file *seq = file->private_data;
693 struct net *net = seq->private;
694 put_net(net);
695 return seq_release(inode, file);
696} 664}
697 665
698static const struct file_operations wireless_seq_fops = { 666static const struct file_operations wireless_seq_fops = {
@@ -700,7 +668,7 @@ static const struct file_operations wireless_seq_fops = {
700 .open = wireless_seq_open, 668 .open = wireless_seq_open,
701 .read = seq_read, 669 .read = seq_read,
702 .llseek = seq_lseek, 670 .llseek = seq_lseek,
703 .release = wireless_seq_release, 671 .release = seq_release_net,
704}; 672};
705 673
706int wext_proc_init(struct net *net) 674int wext_proc_init(struct net *net)
@@ -1094,7 +1062,7 @@ int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
1094 rtnl_lock(); 1062 rtnl_lock();
1095 ret = wireless_process_ioctl(net, ifr, cmd); 1063 ret = wireless_process_ioctl(net, ifr, cmd);
1096 rtnl_unlock(); 1064 rtnl_unlock();
1097 if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq))) 1065 if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct iwreq)))
1098 return -EFAULT; 1066 return -EFAULT;
1099 return ret; 1067 return ret;
1100} 1068}
@@ -1137,7 +1105,7 @@ static void wireless_nlevent_process(unsigned long data)
1137 struct sk_buff *skb; 1105 struct sk_buff *skb;
1138 1106
1139 while ((skb = skb_dequeue(&wireless_nlevent_queue))) 1107 while ((skb = skb_dequeue(&wireless_nlevent_queue)))
1140 rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); 1108 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
1141} 1109}
1142 1110
1143static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); 1111static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
@@ -1189,6 +1157,9 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
1189 struct sk_buff *skb; 1157 struct sk_buff *skb;
1190 int err; 1158 int err;
1191 1159
1160 if (dev->nd_net != &init_net)
1161 return;
1162
1192 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1163 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1193 if (!skb) 1164 if (!skb)
1194 return; 1165 return;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index fc416f9606a9..339ca4a8e89e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -83,9 +83,9 @@ struct compat_x25_subscrip_struct {
83int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr, 83int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr,
84 struct x25_address *calling_addr) 84 struct x25_address *calling_addr)
85{ 85{
86 int called_len, calling_len; 86 unsigned int called_len, calling_len;
87 char *called, *calling; 87 char *called, *calling;
88 int i; 88 unsigned int i;
89 89
90 called_len = (*p >> 0) & 0x0F; 90 called_len = (*p >> 0) & 0x0F;
91 calling_len = (*p >> 4) & 0x0F; 91 calling_len = (*p >> 4) & 0x0F;
@@ -472,7 +472,7 @@ static struct proto x25_proto = {
472static struct sock *x25_alloc_socket(struct net *net) 472static struct sock *x25_alloc_socket(struct net *net)
473{ 473{
474 struct x25_sock *x25; 474 struct x25_sock *x25;
475 struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); 475 struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto);
476 476
477 if (!sk) 477 if (!sk)
478 goto out; 478 goto out;
@@ -1652,7 +1652,7 @@ static int __init x25_init(void)
1652 1652
1653 register_netdevice_notifier(&x25_dev_notifier); 1653 register_netdevice_notifier(&x25_dev_notifier);
1654 1654
1655 printk(KERN_INFO "X.25 for Linux. Version 0.2 for Linux 2.1.15\n"); 1655 printk(KERN_INFO "X.25 for Linux Version 0.2\n");
1656 1656
1657#ifdef CONFIG_SYSCTL 1657#ifdef CONFIG_SYSCTL
1658 x25_register_sysctl(); 1658 x25_register_sysctl();
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index a59b77f18234..6ebda25c24e9 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -84,29 +84,15 @@ static struct ctl_table x25_table[] = {
84 { 0, }, 84 { 0, },
85}; 85};
86 86
87static struct ctl_table x25_dir_table[] = { 87static struct ctl_path x25_path[] = {
88 { 88 { .procname = "net", .ctl_name = CTL_NET, },
89 .ctl_name = NET_X25, 89 { .procname = "x25", .ctl_name = NET_X25, },
90 .procname = "x25", 90 { }
91 .mode = 0555,
92 .child = x25_table,
93 },
94 { 0, },
95};
96
97static struct ctl_table x25_root_table[] = {
98 {
99 .ctl_name = CTL_NET,
100 .procname = "net",
101 .mode = 0555,
102 .child = x25_dir_table,
103 },
104 { 0, },
105}; 91};
106 92
107void __init x25_register_sysctl(void) 93void __init x25_register_sysctl(void)
108{ 94{
109 x25_table_header = register_sysctl_table(x25_root_table); 95 x25_table_header = register_sysctl_paths(x25_path, x25_table);
110} 96}
111 97
112void x25_unregister_sysctl(void) 98void x25_unregister_sysctl(void)
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index dec404afa113..a21f6646eb3a 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -205,9 +205,7 @@ int x25_create_facilities(unsigned char *buffer,
205 } 205 }
206 206
207 if (dte_facs->calling_len && (facil_mask & X25_MASK_CALLING_AE)) { 207 if (dte_facs->calling_len && (facil_mask & X25_MASK_CALLING_AE)) {
208 unsigned bytecount = (dte_facs->calling_len % 2) ? 208 unsigned bytecount = (dte_facs->calling_len + 1) >> 1;
209 dte_facs->calling_len / 2 + 1 :
210 dte_facs->calling_len / 2;
211 *p++ = X25_FAC_CALLING_AE; 209 *p++ = X25_FAC_CALLING_AE;
212 *p++ = 1 + bytecount; 210 *p++ = 1 + bytecount;
213 *p++ = dte_facs->calling_len; 211 *p++ = dte_facs->calling_len;
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index 8738ec7ce693..056a55f3a871 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -12,7 +12,7 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <net/x25.h> 13#include <net/x25.h>
14 14
15struct list_head x25_forward_list = LIST_HEAD_INIT(x25_forward_list); 15LIST_HEAD(x25_forward_list);
16DEFINE_RWLOCK(x25_forward_list_lock); 16DEFINE_RWLOCK(x25_forward_list_lock);
17 17
18int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, 18int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
@@ -118,13 +118,14 @@ int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) {
118 goto out; 118 goto out;
119 119
120 if ( (skbn = pskb_copy(skb, GFP_ATOMIC)) == NULL){ 120 if ( (skbn = pskb_copy(skb, GFP_ATOMIC)) == NULL){
121 goto out; 121 goto output;
122 122
123 } 123 }
124 x25_transmit_link(skbn, nb); 124 x25_transmit_link(skbn, nb);
125 125
126 x25_neigh_put(nb);
127 rc = 1; 126 rc = 1;
127output:
128 x25_neigh_put(nb);
128out: 129out:
129 return rc; 130 return rc;
130} 131}
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 1c88762c2794..7d7c3abf38b5 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -247,7 +247,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
247 break; 247 break;
248 } 248 }
249 if (atomic_read(&sk->sk_rmem_alloc) > 249 if (atomic_read(&sk->sk_rmem_alloc) >
250 (sk->sk_rcvbuf / 2)) 250 (sk->sk_rcvbuf >> 1))
251 x25->condition |= X25_COND_OWN_RX_BUSY; 251 x25->condition |= X25_COND_OWN_RX_BUSY;
252 } 252 }
253 /* 253 /*
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 741ce95d4ad1..e4e1b6e49538 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <net/x25.h> 31#include <net/x25.h>
32 32
33static struct list_head x25_neigh_list = LIST_HEAD_INIT(x25_neigh_list); 33static LIST_HEAD(x25_neigh_list);
34static DEFINE_RWLOCK(x25_neigh_list_lock); 34static DEFINE_RWLOCK(x25_neigh_list_lock);
35 35
36static void x25_t20timer_expiry(unsigned long); 36static void x25_t20timer_expiry(unsigned long);
@@ -247,10 +247,7 @@ void x25_link_device_up(struct net_device *dev)
247 return; 247 return;
248 248
249 skb_queue_head_init(&nb->queue); 249 skb_queue_head_init(&nb->queue);
250 250 setup_timer(&nb->t20timer, x25_t20timer_expiry, (unsigned long)nb);
251 init_timer(&nb->t20timer);
252 nb->t20timer.data = (unsigned long)nb;
253 nb->t20timer.function = &x25_t20timer_expiry;
254 251
255 dev_hold(dev); 252 dev_hold(dev);
256 nb->dev = dev; 253 nb->dev = dev;
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 7d55e50c936f..3f52b09bed03 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -41,6 +41,7 @@ found:
41} 41}
42 42
43static void *x25_seq_route_start(struct seq_file *seq, loff_t *pos) 43static void *x25_seq_route_start(struct seq_file *seq, loff_t *pos)
44 __acquires(x25_route_list_lock)
44{ 45{
45 loff_t l = *pos; 46 loff_t l = *pos;
46 47
@@ -70,6 +71,7 @@ out:
70} 71}
71 72
72static void x25_seq_route_stop(struct seq_file *seq, void *v) 73static void x25_seq_route_stop(struct seq_file *seq, void *v)
74 __releases(x25_route_list_lock)
73{ 75{
74 read_unlock_bh(&x25_route_list_lock); 76 read_unlock_bh(&x25_route_list_lock);
75} 77}
@@ -105,6 +107,7 @@ found:
105} 107}
106 108
107static void *x25_seq_socket_start(struct seq_file *seq, loff_t *pos) 109static void *x25_seq_socket_start(struct seq_file *seq, loff_t *pos)
110 __acquires(x25_list_lock)
108{ 111{
109 loff_t l = *pos; 112 loff_t l = *pos;
110 113
@@ -127,6 +130,7 @@ out:
127} 130}
128 131
129static void x25_seq_socket_stop(struct seq_file *seq, void *v) 132static void x25_seq_socket_stop(struct seq_file *seq, void *v)
133 __releases(x25_list_lock)
130{ 134{
131 read_unlock_bh(&x25_list_lock); 135 read_unlock_bh(&x25_list_lock);
132} 136}
@@ -183,6 +187,7 @@ found:
183} 187}
184 188
185static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos) 189static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos)
190 __acquires(x25_forward_list_lock)
186{ 191{
187 loff_t l = *pos; 192 loff_t l = *pos;
188 193
@@ -213,6 +218,7 @@ out:
213} 218}
214 219
215static void x25_seq_forward_stop(struct seq_file *seq, void *v) 220static void x25_seq_forward_stop(struct seq_file *seq, void *v)
221 __releases(x25_forward_list_lock)
216{ 222{
217 read_unlock_bh(&x25_forward_list_lock); 223 read_unlock_bh(&x25_forward_list_lock);
218} 224}
@@ -287,7 +293,7 @@ static const struct file_operations x25_seq_route_fops = {
287 .release = seq_release, 293 .release = seq_release,
288}; 294};
289 295
290static struct file_operations x25_seq_forward_fops = { 296static const struct file_operations x25_seq_forward_fops = {
291 .owner = THIS_MODULE, 297 .owner = THIS_MODULE,
292 .open = x25_seq_forward_open, 298 .open = x25_seq_forward_open,
293 .read = seq_read, 299 .read = seq_read,
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 86b5b4da097c..2c999ccf504a 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -21,7 +21,7 @@
21#include <linux/init.h> 21#include <linux/init.h>
22#include <net/x25.h> 22#include <net/x25.h>
23 23
24struct list_head x25_route_list = LIST_HEAD_INIT(x25_route_list); 24LIST_HEAD(x25_route_list);
25DEFINE_RWLOCK(x25_route_list_lock); 25DEFINE_RWLOCK(x25_route_list_lock);
26 26
27/* 27/*
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 8d6220aa5d0f..511a5986af3e 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -359,7 +359,7 @@ void x25_check_rbuf(struct sock *sk)
359{ 359{
360 struct x25_sock *x25 = x25_sk(sk); 360 struct x25_sock *x25 = x25_sk(sk);
361 361
362 if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && 362 if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf >> 1) &&
363 (x25->condition & X25_COND_OWN_RX_BUSY)) { 363 (x25->condition & X25_COND_OWN_RX_BUSY)) {
364 x25->condition &= ~X25_COND_OWN_RX_BUSY; 364 x25->condition &= ~X25_COND_OWN_RX_BUSY;
365 x25->condition &= ~X25_COND_ACK_PENDING; 365 x25->condition &= ~X25_COND_ACK_PENDING;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 2af190dc5b01..d3e3e54db936 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -33,9 +33,7 @@ void x25_init_timers(struct sock *sk)
33{ 33{
34 struct x25_sock *x25 = x25_sk(sk); 34 struct x25_sock *x25 = x25_sk(sk);
35 35
36 init_timer(&x25->timer); 36 setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
37 x25->timer.data = (unsigned long)sk;
38 x25->timer.function = &x25_timer_expiry;
39 37
40 /* initialized by sock_init_data */ 38 /* initialized by sock_init_data */
41 sk->sk_timer.data = (unsigned long)sk; 39 sk->sk_timer.data = (unsigned long)sk;
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 577a4f821b98..8f9dbec319be 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -3,6 +3,7 @@
3# 3#
4config XFRM 4config XFRM
5 bool 5 bool
6 select CRYPTO
6 depends on NET 7 depends on NET
7 8
8config XFRM_USER 9config XFRM_USER
@@ -35,6 +36,16 @@ config XFRM_MIGRATE
35 36
36 If unsure, say N. 37 If unsure, say N.
37 38
39config XFRM_STATISTICS
40 bool "Transformation statistics (EXPERIMENTAL)"
41 depends on XFRM && PROC_FS && EXPERIMENTAL
42 ---help---
43 This statistics is not a SNMP/MIB specification but shows
44 statistics about transformation error (or almost error) factor
45 at packet processing for developer.
46
47 If unsure, say N.
48
38config NET_KEY 49config NET_KEY
39 tristate "PF_KEY sockets" 50 tristate "PF_KEY sockets"
40 select XFRM 51 select XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 45744a3d3a51..332cfb0ff566 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,5 +4,6 @@
4 4
5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ 5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
6 xfrm_input.o xfrm_output.o xfrm_algo.o 6 xfrm_input.o xfrm_output.o xfrm_algo.o
7obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
7obj-$(CONFIG_XFRM_USER) += xfrm_user.o 8obj-$(CONFIG_XFRM_USER) += xfrm_user.o
8 9
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 0426388d351d..6cc15250de69 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -21,7 +21,6 @@
21#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) 21#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
22#include <net/esp.h> 22#include <net/esp.h>
23#endif 23#endif
24#include <asm/scatterlist.h>
25 24
26/* 25/*
27 * Algorithms supported by IPsec. These entries contain properties which 26 * Algorithms supported by IPsec. These entries contain properties which
@@ -29,6 +28,105 @@
29 * that instantiated crypto transforms have correct parameters for IPsec 28 * that instantiated crypto transforms have correct parameters for IPsec
30 * purposes. 29 * purposes.
31 */ 30 */
31static struct xfrm_algo_desc aead_list[] = {
32{
33 .name = "rfc4106(gcm(aes))",
34
35 .uinfo = {
36 .aead = {
37 .icv_truncbits = 64,
38 }
39 },
40
41 .desc = {
42 .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8,
43 .sadb_alg_ivlen = 8,
44 .sadb_alg_minbits = 128,
45 .sadb_alg_maxbits = 256
46 }
47},
48{
49 .name = "rfc4106(gcm(aes))",
50
51 .uinfo = {
52 .aead = {
53 .icv_truncbits = 96,
54 }
55 },
56
57 .desc = {
58 .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12,
59 .sadb_alg_ivlen = 8,
60 .sadb_alg_minbits = 128,
61 .sadb_alg_maxbits = 256
62 }
63},
64{
65 .name = "rfc4106(gcm(aes))",
66
67 .uinfo = {
68 .aead = {
69 .icv_truncbits = 128,
70 }
71 },
72
73 .desc = {
74 .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16,
75 .sadb_alg_ivlen = 8,
76 .sadb_alg_minbits = 128,
77 .sadb_alg_maxbits = 256
78 }
79},
80{
81 .name = "rfc4309(ccm(aes))",
82
83 .uinfo = {
84 .aead = {
85 .icv_truncbits = 64,
86 }
87 },
88
89 .desc = {
90 .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8,
91 .sadb_alg_ivlen = 8,
92 .sadb_alg_minbits = 128,
93 .sadb_alg_maxbits = 256
94 }
95},
96{
97 .name = "rfc4309(ccm(aes))",
98
99 .uinfo = {
100 .aead = {
101 .icv_truncbits = 96,
102 }
103 },
104
105 .desc = {
106 .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12,
107 .sadb_alg_ivlen = 8,
108 .sadb_alg_minbits = 128,
109 .sadb_alg_maxbits = 256
110 }
111},
112{
113 .name = "rfc4309(ccm(aes))",
114
115 .uinfo = {
116 .aead = {
117 .icv_truncbits = 128,
118 }
119 },
120
121 .desc = {
122 .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16,
123 .sadb_alg_ivlen = 8,
124 .sadb_alg_minbits = 128,
125 .sadb_alg_maxbits = 256
126 }
127},
128};
129
32static struct xfrm_algo_desc aalg_list[] = { 130static struct xfrm_algo_desc aalg_list[] = {
33{ 131{
34 .name = "hmac(digest_null)", 132 .name = "hmac(digest_null)",
@@ -333,6 +431,11 @@ static struct xfrm_algo_desc calg_list[] = {
333}, 431},
334}; 432};
335 433
434static inline int aead_entries(void)
435{
436 return ARRAY_SIZE(aead_list);
437}
438
336static inline int aalg_entries(void) 439static inline int aalg_entries(void)
337{ 440{
338 return ARRAY_SIZE(aalg_list); 441 return ARRAY_SIZE(aalg_list);
@@ -355,25 +458,32 @@ struct xfrm_algo_list {
355 u32 mask; 458 u32 mask;
356}; 459};
357 460
461static const struct xfrm_algo_list xfrm_aead_list = {
462 .algs = aead_list,
463 .entries = ARRAY_SIZE(aead_list),
464 .type = CRYPTO_ALG_TYPE_AEAD,
465 .mask = CRYPTO_ALG_TYPE_MASK,
466};
467
358static const struct xfrm_algo_list xfrm_aalg_list = { 468static const struct xfrm_algo_list xfrm_aalg_list = {
359 .algs = aalg_list, 469 .algs = aalg_list,
360 .entries = ARRAY_SIZE(aalg_list), 470 .entries = ARRAY_SIZE(aalg_list),
361 .type = CRYPTO_ALG_TYPE_HASH, 471 .type = CRYPTO_ALG_TYPE_HASH,
362 .mask = CRYPTO_ALG_TYPE_HASH_MASK | CRYPTO_ALG_ASYNC, 472 .mask = CRYPTO_ALG_TYPE_HASH_MASK,
363}; 473};
364 474
365static const struct xfrm_algo_list xfrm_ealg_list = { 475static const struct xfrm_algo_list xfrm_ealg_list = {
366 .algs = ealg_list, 476 .algs = ealg_list,
367 .entries = ARRAY_SIZE(ealg_list), 477 .entries = ARRAY_SIZE(ealg_list),
368 .type = CRYPTO_ALG_TYPE_BLKCIPHER, 478 .type = CRYPTO_ALG_TYPE_BLKCIPHER,
369 .mask = CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC, 479 .mask = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
370}; 480};
371 481
372static const struct xfrm_algo_list xfrm_calg_list = { 482static const struct xfrm_algo_list xfrm_calg_list = {
373 .algs = calg_list, 483 .algs = calg_list,
374 .entries = ARRAY_SIZE(calg_list), 484 .entries = ARRAY_SIZE(calg_list),
375 .type = CRYPTO_ALG_TYPE_COMPRESS, 485 .type = CRYPTO_ALG_TYPE_COMPRESS,
376 .mask = CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC, 486 .mask = CRYPTO_ALG_TYPE_MASK,
377}; 487};
378 488
379static struct xfrm_algo_desc *xfrm_find_algo( 489static struct xfrm_algo_desc *xfrm_find_algo(
@@ -462,6 +572,33 @@ struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe)
462} 572}
463EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); 573EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
464 574
575struct xfrm_aead_name {
576 const char *name;
577 int icvbits;
578};
579
580static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry,
581 const void *data)
582{
583 const struct xfrm_aead_name *aead = data;
584 const char *name = aead->name;
585
586 return aead->icvbits == entry->uinfo.aead.icv_truncbits && name &&
587 !strcmp(name, entry->name);
588}
589
590struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe)
591{
592 struct xfrm_aead_name data = {
593 .name = name,
594 .icvbits = icv_len,
595 };
596
597 return xfrm_find_algo(&xfrm_aead_list, xfrm_aead_name_match, &data,
598 probe);
599}
600EXPORT_SYMBOL_GPL(xfrm_aead_get_byname);
601
465struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx) 602struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx)
466{ 603{
467 if (idx >= aalg_entries()) 604 if (idx >= aalg_entries())
@@ -487,7 +624,6 @@ EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx);
487 */ 624 */
488void xfrm_probe_algs(void) 625void xfrm_probe_algs(void)
489{ 626{
490#ifdef CONFIG_CRYPTO
491 int i, status; 627 int i, status;
492 628
493 BUG_ON(in_softirq()); 629 BUG_ON(in_softirq());
@@ -512,7 +648,6 @@ void xfrm_probe_algs(void)
512 if (calg_list[i].available != status) 648 if (calg_list[i].available != status)
513 calg_list[i].available = status; 649 calg_list[i].available = status;
514 } 650 }
515#endif
516} 651}
517EXPORT_SYMBOL_GPL(xfrm_probe_algs); 652EXPORT_SYMBOL_GPL(xfrm_probe_algs);
518 653
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 55ab5792af56..a2023ec52329 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -17,17 +17,14 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
17 struct hlist_head *n; 17 struct hlist_head *n;
18 18
19 if (sz <= PAGE_SIZE) 19 if (sz <= PAGE_SIZE)
20 n = kmalloc(sz, GFP_KERNEL); 20 n = kzalloc(sz, GFP_KERNEL);
21 else if (hashdist) 21 else if (hashdist)
22 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); 22 n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
23 else 23 else
24 n = (struct hlist_head *) 24 n = (struct hlist_head *)
25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN, 25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
26 get_order(sz)); 26 get_order(sz));
27 27
28 if (n)
29 memset(n, 0, sz);
30
31 return n; 28 return n;
32} 29}
33 30
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index cb97fda1b6df..4d6ebc633a94 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -9,6 +9,8 @@
9 9
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/netdevice.h>
13#include <net/dst.h>
12#include <net/ip.h> 14#include <net/ip.h>
13#include <net/xfrm.h> 15#include <net/xfrm.h>
14 16
@@ -79,7 +81,180 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
79 *seq = *(__be32*)(skb_transport_header(skb) + offset_seq); 81 *seq = *(__be32*)(skb_transport_header(skb) + offset_seq);
80 return 0; 82 return 0;
81} 83}
82EXPORT_SYMBOL(xfrm_parse_spi); 84
85int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
86{
87 int err;
88
89 err = x->outer_mode->afinfo->extract_input(x, skb);
90 if (err)
91 return err;
92
93 skb->protocol = x->inner_mode->afinfo->eth_proto;
94 return x->inner_mode->input2(x, skb);
95}
96EXPORT_SYMBOL(xfrm_prepare_input);
97
98int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
99{
100 int err;
101 __be32 seq;
102 struct xfrm_state *x;
103 xfrm_address_t *daddr;
104 unsigned int family;
105 int decaps = 0;
106 int async = 0;
107
108 /* A negative encap_type indicates async resumption. */
109 if (encap_type < 0) {
110 async = 1;
111 x = xfrm_input_state(skb);
112 seq = XFRM_SKB_CB(skb)->seq;
113 goto resume;
114 }
115
116 /* Allocate new secpath or COW existing one. */
117 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
118 struct sec_path *sp;
119
120 sp = secpath_dup(skb->sp);
121 if (!sp) {
122 XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
123 goto drop;
124 }
125 if (skb->sp)
126 secpath_put(skb->sp);
127 skb->sp = sp;
128 }
129
130 daddr = (xfrm_address_t *)(skb_network_header(skb) +
131 XFRM_SPI_SKB_CB(skb)->daddroff);
132 family = XFRM_SPI_SKB_CB(skb)->family;
133
134 seq = 0;
135 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
136 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
137 goto drop;
138 }
139
140 do {
141 if (skb->sp->len == XFRM_MAX_DEPTH) {
142 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
143 goto drop;
144 }
145
146 x = xfrm_state_lookup(daddr, spi, nexthdr, family);
147 if (x == NULL) {
148 XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
149 xfrm_audit_state_notfound(skb, family, spi, seq);
150 goto drop;
151 }
152
153 skb->sp->xvec[skb->sp->len++] = x;
154
155 spin_lock(&x->lock);
156 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
157 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID);
158 goto drop_unlock;
159 }
160
161 if ((x->encap ? x->encap->encap_type : 0) != encap_type) {
162 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
163 goto drop_unlock;
164 }
165
166 if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) {
167 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATESEQERROR);
168 goto drop_unlock;
169 }
170
171 if (xfrm_state_check_expire(x)) {
172 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED);
173 goto drop_unlock;
174 }
175
176 spin_unlock(&x->lock);
177
178 XFRM_SKB_CB(skb)->seq = seq;
179
180 nexthdr = x->type->input(x, skb);
181
182 if (nexthdr == -EINPROGRESS)
183 return 0;
184
185resume:
186 spin_lock(&x->lock);
187 if (nexthdr <= 0) {
188 if (nexthdr == -EBADMSG) {
189 xfrm_audit_state_icvfail(x, skb,
190 x->type->proto);
191 x->stats.integrity_failed++;
192 }
193 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR);
194 goto drop_unlock;
195 }
196
197 /* only the first xfrm gets the encap type */
198 encap_type = 0;
199
200 if (x->props.replay_window)
201 xfrm_replay_advance(x, seq);
202
203 x->curlft.bytes += skb->len;
204 x->curlft.packets++;
205
206 spin_unlock(&x->lock);
207
208 XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
209
210 if (x->inner_mode->input(x, skb)) {
211 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR);
212 goto drop;
213 }
214
215 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
216 decaps = 1;
217 break;
218 }
219
220 /*
221 * We need the inner address. However, we only get here for
222 * transport mode so the outer address is identical.
223 */
224 daddr = &x->id.daddr;
225 family = x->outer_mode->afinfo->family;
226
227 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
228 if (err < 0) {
229 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
230 goto drop;
231 }
232 } while (!err);
233
234 nf_reset(skb);
235
236 if (decaps) {
237 dst_release(skb->dst);
238 skb->dst = NULL;
239 netif_rx(skb);
240 return 0;
241 } else {
242 return x->inner_mode->afinfo->transport_finish(skb, async);
243 }
244
245drop_unlock:
246 spin_unlock(&x->lock);
247drop:
248 kfree_skb(skb);
249 return 0;
250}
251EXPORT_SYMBOL(xfrm_input);
252
253int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
254{
255 return xfrm_input(skb, nexthdr, 0, -1);
256}
257EXPORT_SYMBOL(xfrm_input_resume);
83 258
84void __init xfrm_input_init(void) 259void __init xfrm_input_init(void)
85{ 260{
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index f4bfd6c45651..fc690368325f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -12,14 +12,18 @@
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
15#include <linux/netfilter.h>
15#include <linux/skbuff.h> 16#include <linux/skbuff.h>
16#include <linux/spinlock.h> 17#include <linux/spinlock.h>
17#include <net/dst.h> 18#include <net/dst.h>
18#include <net/xfrm.h> 19#include <net/xfrm.h>
19 20
21static int xfrm_output2(struct sk_buff *skb);
22
20static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) 23static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
21{ 24{
22 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) 25 struct dst_entry *dst = skb->dst;
26 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
23 - skb_headroom(skb); 27 - skb_headroom(skb);
24 28
25 if (nhead > 0) 29 if (nhead > 0)
@@ -29,54 +33,64 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
29 return 0; 33 return 0;
30} 34}
31 35
32static int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) 36static int xfrm_output_one(struct sk_buff *skb, int err)
33{
34 int err = xfrm_state_check_expire(x);
35 if (err < 0)
36 goto err;
37 err = xfrm_state_check_space(x, skb);
38err:
39 return err;
40}
41
42int xfrm_output(struct sk_buff *skb)
43{ 37{
44 struct dst_entry *dst = skb->dst; 38 struct dst_entry *dst = skb->dst;
45 struct xfrm_state *x = dst->xfrm; 39 struct xfrm_state *x = dst->xfrm;
46 int err;
47 40
48 if (skb->ip_summed == CHECKSUM_PARTIAL) { 41 if (err <= 0)
49 err = skb_checksum_help(skb); 42 goto resume;
50 if (err)
51 goto error_nolock;
52 }
53 43
54 do { 44 do {
45 err = xfrm_state_check_space(x, skb);
46 if (err) {
47 XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
48 goto error_nolock;
49 }
50
51 err = x->outer_mode->output(x, skb);
52 if (err) {
53 XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR);
54 goto error_nolock;
55 }
56
55 spin_lock_bh(&x->lock); 57 spin_lock_bh(&x->lock);
56 err = xfrm_state_check(x, skb); 58 err = xfrm_state_check_expire(x);
57 if (err) 59 if (err) {
60 XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED);
58 goto error; 61 goto error;
62 }
59 63
60 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { 64 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
61 XFRM_SKB_CB(skb)->seq = ++x->replay.oseq; 65 XFRM_SKB_CB(skb)->seq = ++x->replay.oseq;
66 if (unlikely(x->replay.oseq == 0)) {
67 XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR);
68 x->replay.oseq--;
69 xfrm_audit_state_replay_overflow(x, skb);
70 err = -EOVERFLOW;
71 goto error;
72 }
62 if (xfrm_aevent_is_on()) 73 if (xfrm_aevent_is_on())
63 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); 74 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
64 } 75 }
65 76
66 err = x->outer_mode->output(x, skb);
67 if (err)
68 goto error;
69
70 x->curlft.bytes += skb->len; 77 x->curlft.bytes += skb->len;
71 x->curlft.packets++; 78 x->curlft.packets++;
72 79
73 spin_unlock_bh(&x->lock); 80 spin_unlock_bh(&x->lock);
74 81
75 err = x->type->output(x, skb); 82 err = x->type->output(x, skb);
76 if (err) 83 if (err == -EINPROGRESS)
84 goto out_exit;
85
86resume:
87 if (err) {
88 XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR);
77 goto error_nolock; 89 goto error_nolock;
90 }
78 91
79 if (!(skb->dst = dst_pop(dst))) { 92 if (!(skb->dst = dst_pop(dst))) {
93 XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
80 err = -EHOSTUNREACH; 94 err = -EHOSTUNREACH;
81 goto error_nolock; 95 goto error_nolock;
82 } 96 }
@@ -86,10 +100,97 @@ int xfrm_output(struct sk_buff *skb)
86 100
87 err = 0; 101 err = 0;
88 102
89error_nolock: 103out_exit:
90 return err; 104 return err;
91error: 105error:
92 spin_unlock_bh(&x->lock); 106 spin_unlock_bh(&x->lock);
93 goto error_nolock; 107error_nolock:
108 kfree_skb(skb);
109 goto out_exit;
110}
111
112int xfrm_output_resume(struct sk_buff *skb, int err)
113{
114 while (likely((err = xfrm_output_one(skb, err)) == 0)) {
115 struct xfrm_state *x;
116
117 nf_reset(skb);
118
119 err = skb->dst->ops->local_out(skb);
120 if (unlikely(err != 1))
121 goto out;
122
123 x = skb->dst->xfrm;
124 if (!x)
125 return dst_output(skb);
126
127 err = nf_hook(x->inner_mode->afinfo->family,
128 NF_INET_POST_ROUTING, skb,
129 NULL, skb->dst->dev, xfrm_output2);
130 if (unlikely(err != 1))
131 goto out;
132 }
133
134 if (err == -EINPROGRESS)
135 err = 0;
136
137out:
138 return err;
139}
140EXPORT_SYMBOL_GPL(xfrm_output_resume);
141
142static int xfrm_output2(struct sk_buff *skb)
143{
144 return xfrm_output_resume(skb, 1);
145}
146
147static int xfrm_output_gso(struct sk_buff *skb)
148{
149 struct sk_buff *segs;
150
151 segs = skb_gso_segment(skb, 0);
152 kfree_skb(skb);
153 if (unlikely(IS_ERR(segs)))
154 return PTR_ERR(segs);
155
156 do {
157 struct sk_buff *nskb = segs->next;
158 int err;
159
160 segs->next = NULL;
161 err = xfrm_output2(segs);
162
163 if (unlikely(err)) {
164 while ((segs = nskb)) {
165 nskb = segs->next;
166 segs->next = NULL;
167 kfree_skb(segs);
168 }
169 return err;
170 }
171
172 segs = nskb;
173 } while (segs);
174
175 return 0;
176}
177
178int xfrm_output(struct sk_buff *skb)
179{
180 int err;
181
182 if (skb_is_gso(skb))
183 return xfrm_output_gso(skb);
184
185 if (skb->ip_summed == CHECKSUM_PARTIAL) {
186 err = skb_checksum_help(skb);
187 if (err) {
188 XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
189 kfree_skb(skb);
190 return err;
191 }
192 }
193
194 return xfrm_output2(skb);
94} 195}
95EXPORT_SYMBOL_GPL(xfrm_output); 196EXPORT_SYMBOL_GPL(xfrm_output);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b702bd8a3893..47219f98053f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -13,6 +13,7 @@
13 * 13 *
14 */ 14 */
15 15
16#include <linux/err.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/kmod.h> 18#include <linux/kmod.h>
18#include <linux/list.h> 19#include <linux/list.h>
@@ -23,13 +24,23 @@
23#include <linux/netfilter.h> 24#include <linux/netfilter.h>
24#include <linux/module.h> 25#include <linux/module.h>
25#include <linux/cache.h> 26#include <linux/cache.h>
27#include <linux/audit.h>
28#include <net/dst.h>
26#include <net/xfrm.h> 29#include <net/xfrm.h>
27#include <net/ip.h> 30#include <net/ip.h>
31#ifdef CONFIG_XFRM_STATISTICS
32#include <net/snmp.h>
33#endif
28 34
29#include "xfrm_hash.h" 35#include "xfrm_hash.h"
30 36
31int sysctl_xfrm_larval_drop __read_mostly; 37int sysctl_xfrm_larval_drop __read_mostly;
32 38
39#ifdef CONFIG_XFRM_STATISTICS
40DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41EXPORT_SYMBOL(xfrm_statistics);
42#endif
43
33DEFINE_MUTEX(xfrm_cfg_mutex); 44DEFINE_MUTEX(xfrm_cfg_mutex);
34EXPORT_SYMBOL(xfrm_cfg_mutex); 45EXPORT_SYMBOL(xfrm_cfg_mutex);
35 46
@@ -49,6 +60,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
49 60
50static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 61static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
51static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 62static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
63static void xfrm_init_pmtu(struct dst_entry *dst);
52 64
53static inline int 65static inline int
54__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) 66__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
@@ -84,23 +96,27 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
84 return 0; 96 return 0;
85} 97}
86 98
87int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 99static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
88 unsigned short family) 100 int family)
89{ 101{
90 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 102 xfrm_address_t *saddr = &x->props.saddr;
91 int err = 0; 103 xfrm_address_t *daddr = &x->id.daddr;
104 struct xfrm_policy_afinfo *afinfo;
105 struct dst_entry *dst;
106
107 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
108 saddr = x->coaddr;
109 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
110 daddr = x->coaddr;
92 111
112 afinfo = xfrm_policy_get_afinfo(family);
93 if (unlikely(afinfo == NULL)) 113 if (unlikely(afinfo == NULL))
94 return -EAFNOSUPPORT; 114 return ERR_PTR(-EAFNOSUPPORT);
95 115
96 if (likely(afinfo->dst_lookup != NULL)) 116 dst = afinfo->dst_lookup(tos, saddr, daddr);
97 err = afinfo->dst_lookup(dst, fl);
98 else
99 err = -EINVAL;
100 xfrm_policy_put_afinfo(afinfo); 117 xfrm_policy_put_afinfo(afinfo);
101 return err; 118 return dst;
102} 119}
103EXPORT_SYMBOL(xfrm_dst_lookup);
104 120
105static inline unsigned long make_jiffies(long secs) 121static inline unsigned long make_jiffies(long secs)
106{ 122{
@@ -196,9 +212,8 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
196 INIT_HLIST_NODE(&policy->byidx); 212 INIT_HLIST_NODE(&policy->byidx);
197 rwlock_init(&policy->lock); 213 rwlock_init(&policy->lock);
198 atomic_set(&policy->refcnt, 1); 214 atomic_set(&policy->refcnt, 1);
199 init_timer(&policy->timer); 215 setup_timer(&policy->timer, xfrm_policy_timer,
200 policy->timer.data = (unsigned long)policy; 216 (unsigned long)policy);
201 policy->timer.function = xfrm_policy_timer;
202 } 217 }
203 return policy; 218 return policy;
204} 219}
@@ -206,7 +221,7 @@ EXPORT_SYMBOL(xfrm_policy_alloc);
206 221
207/* Destroy xfrm_policy: descendant resources must be released to this moment. */ 222/* Destroy xfrm_policy: descendant resources must be released to this moment. */
208 223
209void __xfrm_policy_destroy(struct xfrm_policy *policy) 224void xfrm_policy_destroy(struct xfrm_policy *policy)
210{ 225{
211 BUG_ON(!policy->dead); 226 BUG_ON(!policy->dead);
212 227
@@ -218,7 +233,7 @@ void __xfrm_policy_destroy(struct xfrm_policy *policy)
218 security_xfrm_policy_free(policy); 233 security_xfrm_policy_free(policy);
219 kfree(policy); 234 kfree(policy);
220} 235}
221EXPORT_SYMBOL(__xfrm_policy_destroy); 236EXPORT_SYMBOL(xfrm_policy_destroy);
222 237
223static void xfrm_policy_gc_kill(struct xfrm_policy *policy) 238static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
224{ 239{
@@ -1230,24 +1245,185 @@ xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short fa
1230 return x; 1245 return x;
1231} 1246}
1232 1247
1233/* Allocate chain of dst_entry's, attach known xfrm's, calculate 1248static inline int xfrm_get_tos(struct flowi *fl, int family)
1234 * all the metrics... Shortly, bundle a bundle. 1249{
1235 */ 1250 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1251 int tos;
1236 1252
1237static int 1253 if (!afinfo)
1238xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, 1254 return -EINVAL;
1239 struct flowi *fl, struct dst_entry **dst_p, 1255
1240 unsigned short family) 1256 tos = afinfo->get_tos(fl);
1257
1258 xfrm_policy_put_afinfo(afinfo);
1259
1260 return tos;
1261}
1262
1263static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1241{ 1264{
1242 int err;
1243 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1265 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1244 if (unlikely(afinfo == NULL)) 1266 struct xfrm_dst *xdst;
1267
1268 if (!afinfo)
1269 return ERR_PTR(-EINVAL);
1270
1271 xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1272
1273 xfrm_policy_put_afinfo(afinfo);
1274
1275 return xdst;
1276}
1277
1278static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1279 int nfheader_len)
1280{
1281 struct xfrm_policy_afinfo *afinfo =
1282 xfrm_policy_get_afinfo(dst->ops->family);
1283 int err;
1284
1285 if (!afinfo)
1245 return -EINVAL; 1286 return -EINVAL;
1246 err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); 1287
1288 err = afinfo->init_path(path, dst, nfheader_len);
1289
1290 xfrm_policy_put_afinfo(afinfo);
1291
1292 return err;
1293}
1294
1295static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1296{
1297 struct xfrm_policy_afinfo *afinfo =
1298 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1299 int err;
1300
1301 if (!afinfo)
1302 return -EINVAL;
1303
1304 err = afinfo->fill_dst(xdst, dev);
1305
1247 xfrm_policy_put_afinfo(afinfo); 1306 xfrm_policy_put_afinfo(afinfo);
1307
1248 return err; 1308 return err;
1249} 1309}
1250 1310
1311/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1312 * all the metrics... Shortly, bundle a bundle.
1313 */
1314
1315static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1316 struct xfrm_state **xfrm, int nx,
1317 struct flowi *fl,
1318 struct dst_entry *dst)
1319{
1320 unsigned long now = jiffies;
1321 struct net_device *dev;
1322 struct dst_entry *dst_prev = NULL;
1323 struct dst_entry *dst0 = NULL;
1324 int i = 0;
1325 int err;
1326 int header_len = 0;
1327 int nfheader_len = 0;
1328 int trailer_len = 0;
1329 int tos;
1330 int family = policy->selector.family;
1331
1332 tos = xfrm_get_tos(fl, family);
1333 err = tos;
1334 if (tos < 0)
1335 goto put_states;
1336
1337 dst_hold(dst);
1338
1339 for (; i < nx; i++) {
1340 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1341 struct dst_entry *dst1 = &xdst->u.dst;
1342
1343 err = PTR_ERR(xdst);
1344 if (IS_ERR(xdst)) {
1345 dst_release(dst);
1346 goto put_states;
1347 }
1348
1349 if (!dst_prev)
1350 dst0 = dst1;
1351 else {
1352 dst_prev->child = dst_clone(dst1);
1353 dst1->flags |= DST_NOHASH;
1354 }
1355
1356 xdst->route = dst;
1357 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1358
1359 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1360 family = xfrm[i]->props.family;
1361 dst = xfrm_dst_lookup(xfrm[i], tos, family);
1362 err = PTR_ERR(dst);
1363 if (IS_ERR(dst))
1364 goto put_states;
1365 } else
1366 dst_hold(dst);
1367
1368 dst1->xfrm = xfrm[i];
1369 xdst->genid = xfrm[i]->genid;
1370
1371 dst1->obsolete = -1;
1372 dst1->flags |= DST_HOST;
1373 dst1->lastuse = now;
1374
1375 dst1->input = dst_discard;
1376 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1377
1378 dst1->next = dst_prev;
1379 dst_prev = dst1;
1380
1381 header_len += xfrm[i]->props.header_len;
1382 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1383 nfheader_len += xfrm[i]->props.header_len;
1384 trailer_len += xfrm[i]->props.trailer_len;
1385 }
1386
1387 dst_prev->child = dst;
1388 dst0->path = dst;
1389
1390 err = -ENODEV;
1391 dev = dst->dev;
1392 if (!dev)
1393 goto free_dst;
1394
1395 /* Copy neighbout for reachability confirmation */
1396 dst0->neighbour = neigh_clone(dst->neighbour);
1397
1398 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1399 xfrm_init_pmtu(dst_prev);
1400
1401 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1402 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1403
1404 err = xfrm_fill_dst(xdst, dev);
1405 if (err)
1406 goto free_dst;
1407
1408 dst_prev->header_len = header_len;
1409 dst_prev->trailer_len = trailer_len;
1410 header_len -= xdst->u.dst.xfrm->props.header_len;
1411 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1412 }
1413
1414out:
1415 return dst0;
1416
1417put_states:
1418 for (; i < nx; i++)
1419 xfrm_state_put(xfrm[i]);
1420free_dst:
1421 if (dst0)
1422 dst_free(dst0);
1423 dst0 = ERR_PTR(err);
1424 goto out;
1425}
1426
1251static int inline 1427static int inline
1252xfrm_dst_alloc_copy(void **target, void *src, int size) 1428xfrm_dst_alloc_copy(void **target, void *src, int size)
1253{ 1429{
@@ -1318,34 +1494,47 @@ restart:
1318 1494
1319 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 1495 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1320 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 1496 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1321 if (IS_ERR(policy)) 1497 err = PTR_ERR(policy);
1322 return PTR_ERR(policy); 1498 if (IS_ERR(policy)) {
1499 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1500 goto dropdst;
1501 }
1323 } 1502 }
1324 1503
1325 if (!policy) { 1504 if (!policy) {
1326 /* To accelerate a bit... */ 1505 /* To accelerate a bit... */
1327 if ((dst_orig->flags & DST_NOXFRM) || 1506 if ((dst_orig->flags & DST_NOXFRM) ||
1328 !xfrm_policy_count[XFRM_POLICY_OUT]) 1507 !xfrm_policy_count[XFRM_POLICY_OUT])
1329 return 0; 1508 goto nopol;
1330 1509
1331 policy = flow_cache_lookup(fl, dst_orig->ops->family, 1510 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1332 dir, xfrm_policy_lookup); 1511 dir, xfrm_policy_lookup);
1333 if (IS_ERR(policy)) 1512 err = PTR_ERR(policy);
1334 return PTR_ERR(policy); 1513 if (IS_ERR(policy)) {
1514 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1515 goto dropdst;
1516 }
1335 } 1517 }
1336 1518
1337 if (!policy) 1519 if (!policy)
1338 return 0; 1520 goto nopol;
1339 1521
1340 family = dst_orig->ops->family; 1522 family = dst_orig->ops->family;
1341 policy->curlft.use_time = get_seconds();
1342 pols[0] = policy; 1523 pols[0] = policy;
1343 npols ++; 1524 npols ++;
1344 xfrm_nr += pols[0]->xfrm_nr; 1525 xfrm_nr += pols[0]->xfrm_nr;
1345 1526
1527 err = -ENOENT;
1528 if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1529 goto error;
1530
1531 policy->curlft.use_time = get_seconds();
1532
1346 switch (policy->action) { 1533 switch (policy->action) {
1534 default:
1347 case XFRM_POLICY_BLOCK: 1535 case XFRM_POLICY_BLOCK:
1348 /* Prohibit the flow */ 1536 /* Prohibit the flow */
1537 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1349 err = -EPERM; 1538 err = -EPERM;
1350 goto error; 1539 goto error;
1351 1540
@@ -1365,6 +1554,7 @@ restart:
1365 */ 1554 */
1366 dst = xfrm_find_bundle(fl, policy, family); 1555 dst = xfrm_find_bundle(fl, policy, family);
1367 if (IS_ERR(dst)) { 1556 if (IS_ERR(dst)) {
1557 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1368 err = PTR_ERR(dst); 1558 err = PTR_ERR(dst);
1369 goto error; 1559 goto error;
1370 } 1560 }
@@ -1379,10 +1569,12 @@ restart:
1379 XFRM_POLICY_OUT); 1569 XFRM_POLICY_OUT);
1380 if (pols[1]) { 1570 if (pols[1]) {
1381 if (IS_ERR(pols[1])) { 1571 if (IS_ERR(pols[1])) {
1572 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1382 err = PTR_ERR(pols[1]); 1573 err = PTR_ERR(pols[1]);
1383 goto error; 1574 goto error;
1384 } 1575 }
1385 if (pols[1]->action == XFRM_POLICY_BLOCK) { 1576 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1577 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1386 err = -EPERM; 1578 err = -EPERM;
1387 goto error; 1579 goto error;
1388 } 1580 }
@@ -1413,10 +1605,11 @@ restart:
1413 /* EREMOTE tells the caller to generate 1605 /* EREMOTE tells the caller to generate
1414 * a one-shot blackhole route. 1606 * a one-shot blackhole route.
1415 */ 1607 */
1608 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1416 xfrm_pol_put(policy); 1609 xfrm_pol_put(policy);
1417 return -EREMOTE; 1610 return -EREMOTE;
1418 } 1611 }
1419 if (err == -EAGAIN && flags) { 1612 if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1420 DECLARE_WAITQUEUE(wait, current); 1613 DECLARE_WAITQUEUE(wait, current);
1421 1614
1422 add_wait_queue(&km_waitq, &wait); 1615 add_wait_queue(&km_waitq, &wait);
@@ -1428,6 +1621,7 @@ restart:
1428 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); 1621 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1429 1622
1430 if (nx == -EAGAIN && signal_pending(current)) { 1623 if (nx == -EAGAIN && signal_pending(current)) {
1624 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1431 err = -ERESTART; 1625 err = -ERESTART;
1432 goto error; 1626 goto error;
1433 } 1627 }
@@ -1438,8 +1632,10 @@ restart:
1438 } 1632 }
1439 err = nx; 1633 err = nx;
1440 } 1634 }
1441 if (err < 0) 1635 if (err < 0) {
1636 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1442 goto error; 1637 goto error;
1638 }
1443 } 1639 }
1444 if (nx == 0) { 1640 if (nx == 0) {
1445 /* Flow passes not transformed. */ 1641 /* Flow passes not transformed. */
@@ -1447,13 +1643,10 @@ restart:
1447 return 0; 1643 return 0;
1448 } 1644 }
1449 1645
1450 dst = dst_orig; 1646 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1451 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); 1647 err = PTR_ERR(dst);
1452 1648 if (IS_ERR(dst)) {
1453 if (unlikely(err)) { 1649 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1454 int i;
1455 for (i=0; i<nx; i++)
1456 xfrm_state_put(xfrm[i]);
1457 goto error; 1650 goto error;
1458 } 1651 }
1459 1652
@@ -1474,6 +1667,10 @@ restart:
1474 if (dst) 1667 if (dst)
1475 dst_free(dst); 1668 dst_free(dst);
1476 1669
1670 if (pol_dead)
1671 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1672 else
1673 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1477 err = -EHOSTUNREACH; 1674 err = -EHOSTUNREACH;
1478 goto error; 1675 goto error;
1479 } 1676 }
@@ -1486,6 +1683,7 @@ restart:
1486 write_unlock_bh(&policy->lock); 1683 write_unlock_bh(&policy->lock);
1487 if (dst) 1684 if (dst)
1488 dst_free(dst); 1685 dst_free(dst);
1686 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1489 goto error; 1687 goto error;
1490 } 1688 }
1491 1689
@@ -1500,10 +1698,17 @@ restart:
1500 return 0; 1698 return 0;
1501 1699
1502error: 1700error:
1503 dst_release(dst_orig);
1504 xfrm_pols_put(pols, npols); 1701 xfrm_pols_put(pols, npols);
1702dropdst:
1703 dst_release(dst_orig);
1505 *dst_p = NULL; 1704 *dst_p = NULL;
1506 return err; 1705 return err;
1706
1707nopol:
1708 err = -ENOENT;
1709 if (flags & XFRM_LOOKUP_ICMP)
1710 goto dropdst;
1711 return 0;
1507} 1712}
1508EXPORT_SYMBOL(__xfrm_lookup); 1713EXPORT_SYMBOL(__xfrm_lookup);
1509 1714
@@ -1587,8 +1792,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1587 return start; 1792 return start;
1588} 1793}
1589 1794
1590int 1795int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1591xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) 1796 unsigned int family, int reverse)
1592{ 1797{
1593 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1798 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1594 int err; 1799 int err;
@@ -1596,12 +1801,12 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
1596 if (unlikely(afinfo == NULL)) 1801 if (unlikely(afinfo == NULL))
1597 return -EAFNOSUPPORT; 1802 return -EAFNOSUPPORT;
1598 1803
1599 afinfo->decode_session(skb, fl); 1804 afinfo->decode_session(skb, fl, reverse);
1600 err = security_xfrm_decode_session(skb, &fl->secid); 1805 err = security_xfrm_decode_session(skb, &fl->secid);
1601 xfrm_policy_put_afinfo(afinfo); 1806 xfrm_policy_put_afinfo(afinfo);
1602 return err; 1807 return err;
1603} 1808}
1604EXPORT_SYMBOL(xfrm_decode_session); 1809EXPORT_SYMBOL(__xfrm_decode_session);
1605 1810
1606static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) 1811static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1607{ 1812{
@@ -1623,12 +1828,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1623 int npols = 0; 1828 int npols = 0;
1624 int xfrm_nr; 1829 int xfrm_nr;
1625 int pi; 1830 int pi;
1831 int reverse;
1626 struct flowi fl; 1832 struct flowi fl;
1627 u8 fl_dir = policy_to_flow_dir(dir); 1833 u8 fl_dir;
1628 int xerr_idx = -1; 1834 int xerr_idx = -1;
1629 1835
1630 if (xfrm_decode_session(skb, &fl, family) < 0) 1836 reverse = dir & ~XFRM_POLICY_MASK;
1837 dir &= XFRM_POLICY_MASK;
1838 fl_dir = policy_to_flow_dir(dir);
1839
1840 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1841 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1631 return 0; 1842 return 0;
1843 }
1844
1632 nf_nat_decode_session(skb, &fl, family); 1845 nf_nat_decode_session(skb, &fl, family);
1633 1846
1634 /* First, check used SA against their selectors. */ 1847 /* First, check used SA against their selectors. */
@@ -1637,28 +1850,35 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1637 1850
1638 for (i=skb->sp->len-1; i>=0; i--) { 1851 for (i=skb->sp->len-1; i>=0; i--) {
1639 struct xfrm_state *x = skb->sp->xvec[i]; 1852 struct xfrm_state *x = skb->sp->xvec[i];
1640 if (!xfrm_selector_match(&x->sel, &fl, family)) 1853 if (!xfrm_selector_match(&x->sel, &fl, family)) {
1854 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1641 return 0; 1855 return 0;
1856 }
1642 } 1857 }
1643 } 1858 }
1644 1859
1645 pol = NULL; 1860 pol = NULL;
1646 if (sk && sk->sk_policy[dir]) { 1861 if (sk && sk->sk_policy[dir]) {
1647 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 1862 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1648 if (IS_ERR(pol)) 1863 if (IS_ERR(pol)) {
1864 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1649 return 0; 1865 return 0;
1866 }
1650 } 1867 }
1651 1868
1652 if (!pol) 1869 if (!pol)
1653 pol = flow_cache_lookup(&fl, family, fl_dir, 1870 pol = flow_cache_lookup(&fl, family, fl_dir,
1654 xfrm_policy_lookup); 1871 xfrm_policy_lookup);
1655 1872
1656 if (IS_ERR(pol)) 1873 if (IS_ERR(pol)) {
1874 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1657 return 0; 1875 return 0;
1876 }
1658 1877
1659 if (!pol) { 1878 if (!pol) {
1660 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 1879 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1661 xfrm_secpath_reject(xerr_idx, skb, &fl); 1880 xfrm_secpath_reject(xerr_idx, skb, &fl);
1881 XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1662 return 0; 1882 return 0;
1663 } 1883 }
1664 return 1; 1884 return 1;
@@ -1674,8 +1894,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1674 &fl, family, 1894 &fl, family,
1675 XFRM_POLICY_IN); 1895 XFRM_POLICY_IN);
1676 if (pols[1]) { 1896 if (pols[1]) {
1677 if (IS_ERR(pols[1])) 1897 if (IS_ERR(pols[1])) {
1898 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1678 return 0; 1899 return 0;
1900 }
1679 pols[1]->curlft.use_time = get_seconds(); 1901 pols[1]->curlft.use_time = get_seconds();
1680 npols ++; 1902 npols ++;
1681 } 1903 }
@@ -1696,10 +1918,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1696 1918
1697 for (pi = 0; pi < npols; pi++) { 1919 for (pi = 0; pi < npols; pi++) {
1698 if (pols[pi] != pol && 1920 if (pols[pi] != pol &&
1699 pols[pi]->action != XFRM_POLICY_ALLOW) 1921 pols[pi]->action != XFRM_POLICY_ALLOW) {
1922 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1700 goto reject; 1923 goto reject;
1701 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) 1924 }
1925 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1926 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1702 goto reject_error; 1927 goto reject_error;
1928 }
1703 for (i = 0; i < pols[pi]->xfrm_nr; i++) 1929 for (i = 0; i < pols[pi]->xfrm_nr; i++)
1704 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 1930 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1705 } 1931 }
@@ -1721,16 +1947,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1721 if (k < -1) 1947 if (k < -1)
1722 /* "-2 - errored_index" returned */ 1948 /* "-2 - errored_index" returned */
1723 xerr_idx = -(2+k); 1949 xerr_idx = -(2+k);
1950 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1724 goto reject; 1951 goto reject;
1725 } 1952 }
1726 } 1953 }
1727 1954
1728 if (secpath_has_nontransport(sp, k, &xerr_idx)) 1955 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
1956 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1729 goto reject; 1957 goto reject;
1958 }
1730 1959
1731 xfrm_pols_put(pols, npols); 1960 xfrm_pols_put(pols, npols);
1732 return 1; 1961 return 1;
1733 } 1962 }
1963 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1734 1964
1735reject: 1965reject:
1736 xfrm_secpath_reject(xerr_idx, skb, &fl); 1966 xfrm_secpath_reject(xerr_idx, skb, &fl);
@@ -1744,8 +1974,11 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1744{ 1974{
1745 struct flowi fl; 1975 struct flowi fl;
1746 1976
1747 if (xfrm_decode_session(skb, &fl, family) < 0) 1977 if (xfrm_decode_session(skb, &fl, family) < 0) {
1978 /* XXX: we should have something like FWDHDRERROR here. */
1979 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1748 return 0; 1980 return 0;
1981 }
1749 1982
1750 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 1983 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1751} 1984}
@@ -1789,7 +2022,7 @@ static int stale_bundle(struct dst_entry *dst)
1789void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2022void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1790{ 2023{
1791 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2024 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1792 dst->dev = init_net.loopback_dev; 2025 dst->dev = dev->nd_net->loopback_dev;
1793 dev_hold(dst->dev); 2026 dev_hold(dst->dev);
1794 dev_put(dev); 2027 dev_put(dev);
1795 } 2028 }
@@ -1878,7 +2111,7 @@ static int xfrm_flush_bundles(void)
1878 return 0; 2111 return 0;
1879} 2112}
1880 2113
1881void xfrm_init_pmtu(struct dst_entry *dst) 2114static void xfrm_init_pmtu(struct dst_entry *dst)
1882{ 2115{
1883 do { 2116 do {
1884 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2117 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1899,8 +2132,6 @@ void xfrm_init_pmtu(struct dst_entry *dst)
1899 } while ((dst = dst->next)); 2132 } while ((dst = dst->next));
1900} 2133}
1901 2134
1902EXPORT_SYMBOL(xfrm_init_pmtu);
1903
1904/* Check that the bundle accepts the flow and its components are 2135/* Check that the bundle accepts the flow and its components are
1905 * still valid. 2136 * still valid.
1906 */ 2137 */
@@ -2078,6 +2309,16 @@ static struct notifier_block xfrm_dev_notifier = {
2078 0 2309 0
2079}; 2310};
2080 2311
2312#ifdef CONFIG_XFRM_STATISTICS
2313static int __init xfrm_statistics_init(void)
2314{
2315 if (snmp_mib_init((void **)xfrm_statistics,
2316 sizeof(struct linux_xfrm_mib)) < 0)
2317 return -ENOMEM;
2318 return 0;
2319}
2320#endif
2321
2081static void __init xfrm_policy_init(void) 2322static void __init xfrm_policy_init(void)
2082{ 2323{
2083 unsigned int hmask, sz; 2324 unsigned int hmask, sz;
@@ -2114,71 +2355,81 @@ static void __init xfrm_policy_init(void)
2114 2355
2115void __init xfrm_init(void) 2356void __init xfrm_init(void)
2116{ 2357{
2358#ifdef CONFIG_XFRM_STATISTICS
2359 xfrm_statistics_init();
2360#endif
2117 xfrm_state_init(); 2361 xfrm_state_init();
2118 xfrm_policy_init(); 2362 xfrm_policy_init();
2119 xfrm_input_init(); 2363 xfrm_input_init();
2364#ifdef CONFIG_XFRM_STATISTICS
2365 xfrm_proc_init();
2366#endif
2120} 2367}
2121 2368
2122#ifdef CONFIG_AUDITSYSCALL 2369#ifdef CONFIG_AUDITSYSCALL
2123static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2370static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2124 struct audit_buffer *audit_buf) 2371 struct audit_buffer *audit_buf)
2125{ 2372{
2126 if (xp->security) 2373 struct xfrm_sec_ctx *ctx = xp->security;
2374 struct xfrm_selector *sel = &xp->selector;
2375
2376 if (ctx)
2127 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2377 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2128 xp->security->ctx_alg, xp->security->ctx_doi, 2378 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2129 xp->security->ctx_str);
2130 2379
2131 switch(xp->selector.family) { 2380 switch(sel->family) {
2132 case AF_INET: 2381 case AF_INET:
2133 audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", 2382 audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2134 NIPQUAD(xp->selector.saddr.a4), 2383 NIPQUAD(sel->saddr.a4));
2135 NIPQUAD(xp->selector.daddr.a4)); 2384 if (sel->prefixlen_s != 32)
2385 audit_log_format(audit_buf, " src_prefixlen=%d",
2386 sel->prefixlen_s);
2387 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2388 NIPQUAD(sel->daddr.a4));
2389 if (sel->prefixlen_d != 32)
2390 audit_log_format(audit_buf, " dst_prefixlen=%d",
2391 sel->prefixlen_d);
2136 break; 2392 break;
2137 case AF_INET6: 2393 case AF_INET6:
2138 { 2394 audit_log_format(audit_buf, " src=" NIP6_FMT,
2139 struct in6_addr saddr6, daddr6; 2395 NIP6(*(struct in6_addr *)sel->saddr.a6));
2140 2396 if (sel->prefixlen_s != 128)
2141 memcpy(&saddr6, xp->selector.saddr.a6, 2397 audit_log_format(audit_buf, " src_prefixlen=%d",
2142 sizeof(struct in6_addr)); 2398 sel->prefixlen_s);
2143 memcpy(&daddr6, xp->selector.daddr.a6, 2399 audit_log_format(audit_buf, " dst=" NIP6_FMT,
2144 sizeof(struct in6_addr)); 2400 NIP6(*(struct in6_addr *)sel->daddr.a6));
2145 audit_log_format(audit_buf, 2401 if (sel->prefixlen_d != 128)
2146 " src=" NIP6_FMT " dst=" NIP6_FMT, 2402 audit_log_format(audit_buf, " dst_prefixlen=%d",
2147 NIP6(saddr6), NIP6(daddr6)); 2403 sel->prefixlen_d);
2148 }
2149 break; 2404 break;
2150 } 2405 }
2151} 2406}
2152 2407
2153void 2408void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2154xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid) 2409 u32 auid, u32 secid)
2155{ 2410{
2156 struct audit_buffer *audit_buf; 2411 struct audit_buffer *audit_buf;
2157 extern int audit_enabled;
2158 2412
2159 if (audit_enabled == 0) 2413 audit_buf = xfrm_audit_start("SPD-add");
2160 return;
2161 audit_buf = xfrm_audit_start(sid, auid);
2162 if (audit_buf == NULL) 2414 if (audit_buf == NULL)
2163 return; 2415 return;
2164 audit_log_format(audit_buf, " op=SPD-add res=%u", result); 2416 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2417 audit_log_format(audit_buf, " res=%u", result);
2165 xfrm_audit_common_policyinfo(xp, audit_buf); 2418 xfrm_audit_common_policyinfo(xp, audit_buf);
2166 audit_log_end(audit_buf); 2419 audit_log_end(audit_buf);
2167} 2420}
2168EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 2421EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2169 2422
2170void 2423void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2171xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, u32 auid, u32 sid) 2424 u32 auid, u32 secid)
2172{ 2425{
2173 struct audit_buffer *audit_buf; 2426 struct audit_buffer *audit_buf;
2174 extern int audit_enabled;
2175 2427
2176 if (audit_enabled == 0) 2428 audit_buf = xfrm_audit_start("SPD-delete");
2177 return;
2178 audit_buf = xfrm_audit_start(sid, auid);
2179 if (audit_buf == NULL) 2429 if (audit_buf == NULL)
2180 return; 2430 return;
2181 audit_log_format(audit_buf, " op=SPD-delete res=%u", result); 2431 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2432 audit_log_format(audit_buf, " res=%u", result);
2182 xfrm_audit_common_policyinfo(xp, audit_buf); 2433 xfrm_audit_common_policyinfo(xp, audit_buf);
2183 audit_log_end(audit_buf); 2434 audit_log_end(audit_buf);
2184} 2435}
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
new file mode 100644
index 000000000000..2b0db13f0cda
--- /dev/null
+++ b/net/xfrm/xfrm_proc.c
@@ -0,0 +1,97 @@
1/*
2 * xfrm_proc.c
3 *
4 * Copyright (C)2006-2007 USAGI/WIDE Project
5 *
6 * Authors: Masahide NAKAMURA <nakam@linux-ipv6.org>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <linux/proc_fs.h>
14#include <linux/seq_file.h>
15#include <net/snmp.h>
16#include <net/xfrm.h>
17
18static struct snmp_mib xfrm_mib_list[] = {
19 SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR),
20 SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR),
21 SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR),
22 SNMP_MIB_ITEM("XfrmInNoStates", LINUX_MIB_XFRMINNOSTATES),
23 SNMP_MIB_ITEM("XfrmInStateProtoError", LINUX_MIB_XFRMINSTATEPROTOERROR),
24 SNMP_MIB_ITEM("XfrmInStateModeError", LINUX_MIB_XFRMINSTATEMODEERROR),
25 SNMP_MIB_ITEM("XfrmInStateSeqError", LINUX_MIB_XFRMINSTATESEQERROR),
26 SNMP_MIB_ITEM("XfrmInStateExpired", LINUX_MIB_XFRMINSTATEEXPIRED),
27 SNMP_MIB_ITEM("XfrmInStateMismatch", LINUX_MIB_XFRMINSTATEMISMATCH),
28 SNMP_MIB_ITEM("XfrmInStateInvalid", LINUX_MIB_XFRMINSTATEINVALID),
29 SNMP_MIB_ITEM("XfrmInTmplMismatch", LINUX_MIB_XFRMINTMPLMISMATCH),
30 SNMP_MIB_ITEM("XfrmInNoPols", LINUX_MIB_XFRMINNOPOLS),
31 SNMP_MIB_ITEM("XfrmInPolBlock", LINUX_MIB_XFRMINPOLBLOCK),
32 SNMP_MIB_ITEM("XfrmInPolError", LINUX_MIB_XFRMINPOLERROR),
33 SNMP_MIB_ITEM("XfrmOutError", LINUX_MIB_XFRMOUTERROR),
34 SNMP_MIB_ITEM("XfrmOutBundleGenError", LINUX_MIB_XFRMOUTBUNDLEGENERROR),
35 SNMP_MIB_ITEM("XfrmOutBundleCheckError", LINUX_MIB_XFRMOUTBUNDLECHECKERROR),
36 SNMP_MIB_ITEM("XfrmOutNoStates", LINUX_MIB_XFRMOUTNOSTATES),
37 SNMP_MIB_ITEM("XfrmOutStateProtoError", LINUX_MIB_XFRMOUTSTATEPROTOERROR),
38 SNMP_MIB_ITEM("XfrmOutStateModeError", LINUX_MIB_XFRMOUTSTATEMODEERROR),
39 SNMP_MIB_ITEM("XfrmOutStateSeqError", LINUX_MIB_XFRMOUTSTATESEQERROR),
40 SNMP_MIB_ITEM("XfrmOutStateExpired", LINUX_MIB_XFRMOUTSTATEEXPIRED),
41 SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK),
42 SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
43 SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
44 SNMP_MIB_SENTINEL
45};
46
47static unsigned long
48fold_field(void *mib[], int offt)
49{
50 unsigned long res = 0;
51 int i;
52
53 for_each_possible_cpu(i) {
54 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
55 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
56 }
57 return res;
58}
59
60static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
61{
62 int i;
63 for (i=0; xfrm_mib_list[i].name; i++)
64 seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
65 fold_field((void **)xfrm_statistics,
66 xfrm_mib_list[i].entry));
67 return 0;
68}
69
70static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
71{
72 return single_open(file, xfrm_statistics_seq_show, NULL);
73}
74
75static struct file_operations xfrm_statistics_seq_fops = {
76 .owner = THIS_MODULE,
77 .open = xfrm_statistics_seq_open,
78 .read = seq_read,
79 .llseek = seq_lseek,
80 .release = single_release,
81};
82
83int __init xfrm_proc_init(void)
84{
85 int rc = 0;
86
87 if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO,
88 &xfrm_statistics_seq_fops))
89 goto stat_fail;
90
91 out:
92 return rc;
93
94 stat_fail:
95 rc = -ENOMEM;
96 goto out;
97}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 224b44e31a07..7ba65e82941c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -19,6 +19,7 @@
19#include <linux/ipsec.h> 19#include <linux/ipsec.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/cache.h> 21#include <linux/cache.h>
22#include <linux/audit.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
24#include "xfrm_hash.h" 25#include "xfrm_hash.h"
@@ -60,6 +61,13 @@ static unsigned int xfrm_state_genid;
60static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); 61static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
61static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 62static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
62 63
64#ifdef CONFIG_AUDITSYSCALL
65static void xfrm_audit_state_replay(struct xfrm_state *x,
66 struct sk_buff *skb, __be32 net_seq);
67#else
68#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0)
69#endif /* CONFIG_AUDITSYSCALL */
70
63static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, 71static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
64 xfrm_address_t *saddr, 72 xfrm_address_t *saddr,
65 u32 reqid, 73 u32 reqid,
@@ -203,14 +211,15 @@ static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
203} 211}
204 212
205static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) 213static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
214 __releases(xfrm_state_afinfo_lock)
206{ 215{
207 write_unlock_bh(&xfrm_state_afinfo_lock); 216 write_unlock_bh(&xfrm_state_afinfo_lock);
208} 217}
209 218
210int xfrm_register_type(struct xfrm_type *type, unsigned short family) 219int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
211{ 220{
212 struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); 221 struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
213 struct xfrm_type **typemap; 222 const struct xfrm_type **typemap;
214 int err = 0; 223 int err = 0;
215 224
216 if (unlikely(afinfo == NULL)) 225 if (unlikely(afinfo == NULL))
@@ -226,10 +235,10 @@ int xfrm_register_type(struct xfrm_type *type, unsigned short family)
226} 235}
227EXPORT_SYMBOL(xfrm_register_type); 236EXPORT_SYMBOL(xfrm_register_type);
228 237
229int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) 238int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
230{ 239{
231 struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); 240 struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
232 struct xfrm_type **typemap; 241 const struct xfrm_type **typemap;
233 int err = 0; 242 int err = 0;
234 243
235 if (unlikely(afinfo == NULL)) 244 if (unlikely(afinfo == NULL))
@@ -245,11 +254,11 @@ int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
245} 254}
246EXPORT_SYMBOL(xfrm_unregister_type); 255EXPORT_SYMBOL(xfrm_unregister_type);
247 256
248static struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) 257static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
249{ 258{
250 struct xfrm_state_afinfo *afinfo; 259 struct xfrm_state_afinfo *afinfo;
251 struct xfrm_type **typemap; 260 const struct xfrm_type **typemap;
252 struct xfrm_type *type; 261 const struct xfrm_type *type;
253 int modload_attempted = 0; 262 int modload_attempted = 0;
254 263
255retry: 264retry:
@@ -272,7 +281,7 @@ retry:
272 return type; 281 return type;
273} 282}
274 283
275static void xfrm_put_type(struct xfrm_type *type) 284static void xfrm_put_type(const struct xfrm_type *type)
276{ 285{
277 module_put(type->owner); 286 module_put(type->owner);
278} 287}
@@ -484,7 +493,7 @@ expired:
484 km_state_expired(x, 1, 0); 493 km_state_expired(x, 1, 0);
485 494
486 xfrm_audit_state_delete(x, err ? 0 : 1, 495 xfrm_audit_state_delete(x, err ? 0 : 1,
487 audit_get_loginuid(current->audit_context), 0); 496 audit_get_loginuid(current), 0);
488 497
489out: 498out:
490 spin_unlock(&x->lock); 499 spin_unlock(&x->lock);
@@ -504,12 +513,9 @@ struct xfrm_state *xfrm_state_alloc(void)
504 INIT_HLIST_NODE(&x->bydst); 513 INIT_HLIST_NODE(&x->bydst);
505 INIT_HLIST_NODE(&x->bysrc); 514 INIT_HLIST_NODE(&x->bysrc);
506 INIT_HLIST_NODE(&x->byspi); 515 INIT_HLIST_NODE(&x->byspi);
507 init_timer(&x->timer); 516 setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x);
508 x->timer.function = xfrm_timer_handler; 517 setup_timer(&x->rtimer, xfrm_replay_timer_handler,
509 x->timer.data = (unsigned long)x; 518 (unsigned long)x);
510 init_timer(&x->rtimer);
511 x->rtimer.function = xfrm_replay_timer_handler;
512 x->rtimer.data = (unsigned long)x;
513 x->curlft.add_time = get_seconds(); 519 x->curlft.add_time = get_seconds();
514 x->lft.soft_byte_limit = XFRM_INF; 520 x->lft.soft_byte_limit = XFRM_INF;
515 x->lft.soft_packet_limit = XFRM_INF; 521 x->lft.soft_packet_limit = XFRM_INF;
@@ -552,7 +558,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
552 * The xfrm_state_alloc call gives a reference, and that 558 * The xfrm_state_alloc call gives a reference, and that
553 * is what we are dropping here. 559 * is what we are dropping here.
554 */ 560 */
555 __xfrm_state_put(x); 561 xfrm_state_put(x);
556 err = 0; 562 err = 0;
557 } 563 }
558 564
@@ -759,7 +765,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
759 struct xfrm_policy *pol, int *err, 765 struct xfrm_policy *pol, int *err,
760 unsigned short family) 766 unsigned short family)
761{ 767{
762 unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); 768 unsigned int h;
763 struct hlist_node *entry; 769 struct hlist_node *entry;
764 struct xfrm_state *x, *x0; 770 struct xfrm_state *x, *x0;
765 int acquire_in_progress = 0; 771 int acquire_in_progress = 0;
@@ -767,6 +773,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
767 struct xfrm_state *best = NULL; 773 struct xfrm_state *best = NULL;
768 774
769 spin_lock_bh(&xfrm_state_lock); 775 spin_lock_bh(&xfrm_state_lock);
776 h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
770 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { 777 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
771 if (x->props.family == family && 778 if (x->props.family == family &&
772 x->props.reqid == tmpl->reqid && 779 x->props.reqid == tmpl->reqid &&
@@ -868,11 +875,12 @@ struct xfrm_state *
868xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 875xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
869 unsigned short family, u8 mode, u8 proto, u32 reqid) 876 unsigned short family, u8 mode, u8 proto, u32 reqid)
870{ 877{
871 unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); 878 unsigned int h;
872 struct xfrm_state *rx = NULL, *x = NULL; 879 struct xfrm_state *rx = NULL, *x = NULL;
873 struct hlist_node *entry; 880 struct hlist_node *entry;
874 881
875 spin_lock(&xfrm_state_lock); 882 spin_lock(&xfrm_state_lock);
883 h = xfrm_dst_hash(daddr, saddr, reqid, family);
876 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { 884 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
877 if (x->props.family == family && 885 if (x->props.family == family &&
878 x->props.reqid == reqid && 886 x->props.reqid == reqid &&
@@ -1092,7 +1100,7 @@ out:
1092EXPORT_SYMBOL(xfrm_state_add); 1100EXPORT_SYMBOL(xfrm_state_add);
1093 1101
1094#ifdef CONFIG_XFRM_MIGRATE 1102#ifdef CONFIG_XFRM_MIGRATE
1095struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) 1103static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1096{ 1104{
1097 int err = -ENOMEM; 1105 int err = -ENOMEM;
1098 struct xfrm_state *x = xfrm_state_alloc(); 1106 struct xfrm_state *x = xfrm_state_alloc();
@@ -1167,7 +1175,6 @@ struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1167 kfree(x); 1175 kfree(x);
1168 return NULL; 1176 return NULL;
1169} 1177}
1170EXPORT_SYMBOL(xfrm_state_clone);
1171 1178
1172/* xfrm_state_lock is held */ 1179/* xfrm_state_lock is held */
1173struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) 1180struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
@@ -1609,13 +1616,14 @@ static void xfrm_replay_timer_handler(unsigned long data)
1609 spin_unlock(&x->lock); 1616 spin_unlock(&x->lock);
1610} 1617}
1611 1618
1612int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq) 1619int xfrm_replay_check(struct xfrm_state *x,
1620 struct sk_buff *skb, __be32 net_seq)
1613{ 1621{
1614 u32 diff; 1622 u32 diff;
1615 u32 seq = ntohl(net_seq); 1623 u32 seq = ntohl(net_seq);
1616 1624
1617 if (unlikely(seq == 0)) 1625 if (unlikely(seq == 0))
1618 return -EINVAL; 1626 goto err;
1619 1627
1620 if (likely(seq > x->replay.seq)) 1628 if (likely(seq > x->replay.seq))
1621 return 0; 1629 return 0;
@@ -1624,16 +1632,19 @@ int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1624 if (diff >= min_t(unsigned int, x->props.replay_window, 1632 if (diff >= min_t(unsigned int, x->props.replay_window,
1625 sizeof(x->replay.bitmap) * 8)) { 1633 sizeof(x->replay.bitmap) * 8)) {
1626 x->stats.replay_window++; 1634 x->stats.replay_window++;
1627 return -EINVAL; 1635 goto err;
1628 } 1636 }
1629 1637
1630 if (x->replay.bitmap & (1U << diff)) { 1638 if (x->replay.bitmap & (1U << diff)) {
1631 x->stats.replay++; 1639 x->stats.replay++;
1632 return -EINVAL; 1640 goto err;
1633 } 1641 }
1634 return 0; 1642 return 0;
1643
1644err:
1645 xfrm_audit_state_replay(x, skb, net_seq);
1646 return -EINVAL;
1635} 1647}
1636EXPORT_SYMBOL(xfrm_replay_check);
1637 1648
1638void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) 1649void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1639{ 1650{
@@ -1655,9 +1666,8 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1655 if (xfrm_aevent_is_on()) 1666 if (xfrm_aevent_is_on())
1656 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); 1667 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1657} 1668}
1658EXPORT_SYMBOL(xfrm_replay_advance);
1659 1669
1660static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); 1670static LIST_HEAD(xfrm_km_list);
1661static DEFINE_RWLOCK(xfrm_km_lock); 1671static DEFINE_RWLOCK(xfrm_km_lock);
1662 1672
1663void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 1673void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
@@ -1749,6 +1759,7 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1749} 1759}
1750EXPORT_SYMBOL(km_policy_expired); 1760EXPORT_SYMBOL(km_policy_expired);
1751 1761
1762#ifdef CONFIG_XFRM_MIGRATE
1752int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 1763int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1753 struct xfrm_migrate *m, int num_migrate) 1764 struct xfrm_migrate *m, int num_migrate)
1754{ 1765{
@@ -1768,6 +1779,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1768 return err; 1779 return err;
1769} 1780}
1770EXPORT_SYMBOL(km_migrate); 1781EXPORT_SYMBOL(km_migrate);
1782#endif
1771 1783
1772int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) 1784int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1773{ 1785{
@@ -1895,6 +1907,7 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1895} 1907}
1896 1908
1897static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) 1909static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1910 __releases(xfrm_state_afinfo_lock)
1898{ 1911{
1899 read_unlock(&xfrm_state_afinfo_lock); 1912 read_unlock(&xfrm_state_afinfo_lock);
1900} 1913}
@@ -1994,71 +2007,172 @@ void __init xfrm_state_init(void)
1994} 2007}
1995 2008
1996#ifdef CONFIG_AUDITSYSCALL 2009#ifdef CONFIG_AUDITSYSCALL
1997static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x, 2010static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
1998 struct audit_buffer *audit_buf) 2011 struct audit_buffer *audit_buf)
1999{ 2012{
2000 if (x->security) 2013 struct xfrm_sec_ctx *ctx = x->security;
2014 u32 spi = ntohl(x->id.spi);
2015
2016 if (ctx)
2001 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2017 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2002 x->security->ctx_alg, x->security->ctx_doi, 2018 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2003 x->security->ctx_str);
2004 2019
2005 switch(x->props.family) { 2020 switch(x->props.family) {
2006 case AF_INET: 2021 case AF_INET:
2007 audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", 2022 audit_log_format(audit_buf,
2023 " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
2008 NIPQUAD(x->props.saddr.a4), 2024 NIPQUAD(x->props.saddr.a4),
2009 NIPQUAD(x->id.daddr.a4)); 2025 NIPQUAD(x->id.daddr.a4));
2010 break; 2026 break;
2011 case AF_INET6: 2027 case AF_INET6:
2012 { 2028 audit_log_format(audit_buf,
2013 struct in6_addr saddr6, daddr6; 2029 " src=" NIP6_FMT " dst=" NIP6_FMT,
2014 2030 NIP6(*(struct in6_addr *)x->props.saddr.a6),
2015 memcpy(&saddr6, x->props.saddr.a6, 2031 NIP6(*(struct in6_addr *)x->id.daddr.a6));
2016 sizeof(struct in6_addr)); 2032 break;
2017 memcpy(&daddr6, x->id.daddr.a6, 2033 }
2018 sizeof(struct in6_addr)); 2034
2019 audit_log_format(audit_buf, 2035 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2020 " src=" NIP6_FMT " dst=" NIP6_FMT, 2036}
2021 NIP6(saddr6), NIP6(daddr6)); 2037
2022 } 2038static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2039 struct audit_buffer *audit_buf)
2040{
2041 struct iphdr *iph4;
2042 struct ipv6hdr *iph6;
2043
2044 switch (family) {
2045 case AF_INET:
2046 iph4 = ip_hdr(skb);
2047 audit_log_format(audit_buf,
2048 " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
2049 NIPQUAD(iph4->saddr),
2050 NIPQUAD(iph4->daddr));
2051 break;
2052 case AF_INET6:
2053 iph6 = ipv6_hdr(skb);
2054 audit_log_format(audit_buf,
2055 " src=" NIP6_FMT " dst=" NIP6_FMT
2056 " flowlbl=0x%x%x%x",
2057 NIP6(iph6->saddr),
2058 NIP6(iph6->daddr),
2059 iph6->flow_lbl[0] & 0x0f,
2060 iph6->flow_lbl[1],
2061 iph6->flow_lbl[2]);
2023 break; 2062 break;
2024 } 2063 }
2025} 2064}
2026 2065
2027void 2066void xfrm_audit_state_add(struct xfrm_state *x, int result,
2028xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid) 2067 u32 auid, u32 secid)
2029{ 2068{
2030 struct audit_buffer *audit_buf; 2069 struct audit_buffer *audit_buf;
2031 extern int audit_enabled;
2032 2070
2033 if (audit_enabled == 0) 2071 audit_buf = xfrm_audit_start("SAD-add");
2034 return;
2035 audit_buf = xfrm_audit_start(sid, auid);
2036 if (audit_buf == NULL) 2072 if (audit_buf == NULL)
2037 return; 2073 return;
2038 audit_log_format(audit_buf, " op=SAD-add res=%u",result); 2074 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2039 xfrm_audit_common_stateinfo(x, audit_buf); 2075 xfrm_audit_helper_sainfo(x, audit_buf);
2040 audit_log_format(audit_buf, " spi=%lu(0x%lx)", 2076 audit_log_format(audit_buf, " res=%u", result);
2041 (unsigned long)x->id.spi, (unsigned long)x->id.spi);
2042 audit_log_end(audit_buf); 2077 audit_log_end(audit_buf);
2043} 2078}
2044EXPORT_SYMBOL_GPL(xfrm_audit_state_add); 2079EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2045 2080
2046void 2081void xfrm_audit_state_delete(struct xfrm_state *x, int result,
2047xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid) 2082 u32 auid, u32 secid)
2048{ 2083{
2049 struct audit_buffer *audit_buf; 2084 struct audit_buffer *audit_buf;
2050 extern int audit_enabled;
2051 2085
2052 if (audit_enabled == 0) 2086 audit_buf = xfrm_audit_start("SAD-delete");
2053 return;
2054 audit_buf = xfrm_audit_start(sid, auid);
2055 if (audit_buf == NULL) 2087 if (audit_buf == NULL)
2056 return; 2088 return;
2057 audit_log_format(audit_buf, " op=SAD-delete res=%u",result); 2089 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2058 xfrm_audit_common_stateinfo(x, audit_buf); 2090 xfrm_audit_helper_sainfo(x, audit_buf);
2059 audit_log_format(audit_buf, " spi=%lu(0x%lx)", 2091 audit_log_format(audit_buf, " res=%u", result);
2060 (unsigned long)x->id.spi, (unsigned long)x->id.spi);
2061 audit_log_end(audit_buf); 2092 audit_log_end(audit_buf);
2062} 2093}
2063EXPORT_SYMBOL_GPL(xfrm_audit_state_delete); 2094EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2095
2096void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2097 struct sk_buff *skb)
2098{
2099 struct audit_buffer *audit_buf;
2100 u32 spi;
2101
2102 audit_buf = xfrm_audit_start("SA-replay-overflow");
2103 if (audit_buf == NULL)
2104 return;
2105 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2106 /* don't record the sequence number because it's inherent in this kind
2107 * of audit message */
2108 spi = ntohl(x->id.spi);
2109 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2110 audit_log_end(audit_buf);
2111}
2112EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2113
2114static void xfrm_audit_state_replay(struct xfrm_state *x,
2115 struct sk_buff *skb, __be32 net_seq)
2116{
2117 struct audit_buffer *audit_buf;
2118 u32 spi;
2119
2120 audit_buf = xfrm_audit_start("SA-replayed-pkt");
2121 if (audit_buf == NULL)
2122 return;
2123 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2124 spi = ntohl(x->id.spi);
2125 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2126 spi, spi, ntohl(net_seq));
2127 audit_log_end(audit_buf);
2128}
2129
2130void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2131{
2132 struct audit_buffer *audit_buf;
2133
2134 audit_buf = xfrm_audit_start("SA-notfound");
2135 if (audit_buf == NULL)
2136 return;
2137 xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2138 audit_log_end(audit_buf);
2139}
2140EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2141
2142void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2143 __be32 net_spi, __be32 net_seq)
2144{
2145 struct audit_buffer *audit_buf;
2146 u32 spi;
2147
2148 audit_buf = xfrm_audit_start("SA-notfound");
2149 if (audit_buf == NULL)
2150 return;
2151 xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2152 spi = ntohl(net_spi);
2153 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2154 spi, spi, ntohl(net_seq));
2155 audit_log_end(audit_buf);
2156}
2157EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2158
2159void xfrm_audit_state_icvfail(struct xfrm_state *x,
2160 struct sk_buff *skb, u8 proto)
2161{
2162 struct audit_buffer *audit_buf;
2163 __be32 net_spi;
2164 __be32 net_seq;
2165
2166 audit_buf = xfrm_audit_start("SA-icv-failure");
2167 if (audit_buf == NULL)
2168 return;
2169 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2170 if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2171 u32 spi = ntohl(net_spi);
2172 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2173 spi, spi, ntohl(net_seq));
2174 }
2175 audit_log_end(audit_buf);
2176}
2177EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2064#endif /* CONFIG_AUDITSYSCALL */ 2178#endif /* CONFIG_AUDITSYSCALL */
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d41588d101d0..78338079b7f5 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,7 +31,7 @@
31#include <linux/in6.h> 31#include <linux/in6.h>
32#endif 32#endif
33 33
34static inline int alg_len(struct xfrm_algo *alg) 34static inline int aead_len(struct xfrm_algo_aead *alg)
35{ 35{
36 return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); 36 return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
37} 37}
@@ -45,7 +45,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
45 return 0; 45 return 0;
46 46
47 algp = nla_data(rt); 47 algp = nla_data(rt);
48 if (nla_len(rt) < alg_len(algp)) 48 if (nla_len(rt) < xfrm_alg_len(algp))
49 return -EINVAL; 49 return -EINVAL;
50 50
51 switch (type) { 51 switch (type) {
@@ -73,6 +73,22 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
73 return 0; 73 return 0;
74} 74}
75 75
76static int verify_aead(struct nlattr **attrs)
77{
78 struct nlattr *rt = attrs[XFRMA_ALG_AEAD];
79 struct xfrm_algo_aead *algp;
80
81 if (!rt)
82 return 0;
83
84 algp = nla_data(rt);
85 if (nla_len(rt) < aead_len(algp))
86 return -EINVAL;
87
88 algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
89 return 0;
90}
91
76static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, 92static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type,
77 xfrm_address_t **addrp) 93 xfrm_address_t **addrp)
78{ 94{
@@ -124,20 +140,28 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
124 switch (p->id.proto) { 140 switch (p->id.proto) {
125 case IPPROTO_AH: 141 case IPPROTO_AH:
126 if (!attrs[XFRMA_ALG_AUTH] || 142 if (!attrs[XFRMA_ALG_AUTH] ||
143 attrs[XFRMA_ALG_AEAD] ||
127 attrs[XFRMA_ALG_CRYPT] || 144 attrs[XFRMA_ALG_CRYPT] ||
128 attrs[XFRMA_ALG_COMP]) 145 attrs[XFRMA_ALG_COMP])
129 goto out; 146 goto out;
130 break; 147 break;
131 148
132 case IPPROTO_ESP: 149 case IPPROTO_ESP:
133 if ((!attrs[XFRMA_ALG_AUTH] && 150 if (attrs[XFRMA_ALG_COMP])
134 !attrs[XFRMA_ALG_CRYPT]) || 151 goto out;
135 attrs[XFRMA_ALG_COMP]) 152 if (!attrs[XFRMA_ALG_AUTH] &&
153 !attrs[XFRMA_ALG_CRYPT] &&
154 !attrs[XFRMA_ALG_AEAD])
155 goto out;
156 if ((attrs[XFRMA_ALG_AUTH] ||
157 attrs[XFRMA_ALG_CRYPT]) &&
158 attrs[XFRMA_ALG_AEAD])
136 goto out; 159 goto out;
137 break; 160 break;
138 161
139 case IPPROTO_COMP: 162 case IPPROTO_COMP:
140 if (!attrs[XFRMA_ALG_COMP] || 163 if (!attrs[XFRMA_ALG_COMP] ||
164 attrs[XFRMA_ALG_AEAD] ||
141 attrs[XFRMA_ALG_AUTH] || 165 attrs[XFRMA_ALG_AUTH] ||
142 attrs[XFRMA_ALG_CRYPT]) 166 attrs[XFRMA_ALG_CRYPT])
143 goto out; 167 goto out;
@@ -148,6 +172,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
148 case IPPROTO_ROUTING: 172 case IPPROTO_ROUTING:
149 if (attrs[XFRMA_ALG_COMP] || 173 if (attrs[XFRMA_ALG_COMP] ||
150 attrs[XFRMA_ALG_AUTH] || 174 attrs[XFRMA_ALG_AUTH] ||
175 attrs[XFRMA_ALG_AEAD] ||
151 attrs[XFRMA_ALG_CRYPT] || 176 attrs[XFRMA_ALG_CRYPT] ||
152 attrs[XFRMA_ENCAP] || 177 attrs[XFRMA_ENCAP] ||
153 attrs[XFRMA_SEC_CTX] || 178 attrs[XFRMA_SEC_CTX] ||
@@ -160,6 +185,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
160 goto out; 185 goto out;
161 } 186 }
162 187
188 if ((err = verify_aead(attrs)))
189 goto out;
163 if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) 190 if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH)))
164 goto out; 191 goto out;
165 if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) 192 if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT)))
@@ -204,7 +231,32 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
204 return -ENOSYS; 231 return -ENOSYS;
205 *props = algo->desc.sadb_alg_id; 232 *props = algo->desc.sadb_alg_id;
206 233
207 p = kmemdup(ualg, alg_len(ualg), GFP_KERNEL); 234 p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL);
235 if (!p)
236 return -ENOMEM;
237
238 strcpy(p->alg_name, algo->name);
239 *algpp = p;
240 return 0;
241}
242
243static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
244 struct nlattr *rta)
245{
246 struct xfrm_algo_aead *p, *ualg;
247 struct xfrm_algo_desc *algo;
248
249 if (!rta)
250 return 0;
251
252 ualg = nla_data(rta);
253
254 algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1);
255 if (!algo)
256 return -ENOSYS;
257 *props = algo->desc.sadb_alg_id;
258
259 p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL);
208 if (!p) 260 if (!p)
209 return -ENOMEM; 261 return -ENOMEM;
210 262
@@ -291,6 +343,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
291 343
292 copy_from_user_state(x, p); 344 copy_from_user_state(x, p);
293 345
346 if ((err = attach_aead(&x->aead, &x->props.ealgo,
347 attrs[XFRMA_ALG_AEAD])))
348 goto error;
294 if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, 349 if ((err = attach_one_algo(&x->aalg, &x->props.aalgo,
295 xfrm_aalg_get_byname, 350 xfrm_aalg_get_byname,
296 attrs[XFRMA_ALG_AUTH]))) 351 attrs[XFRMA_ALG_AUTH])))
@@ -507,7 +562,6 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
507 struct xfrm_usersa_info *p, 562 struct xfrm_usersa_info *p,
508 struct sk_buff *skb) 563 struct sk_buff *skb)
509{ 564{
510 spin_lock_bh(&x->lock);
511 copy_to_user_state(x, p); 565 copy_to_user_state(x, p);
512 566
513 if (x->coaddr) 567 if (x->coaddr)
@@ -515,12 +569,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
515 569
516 if (x->lastused) 570 if (x->lastused)
517 NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); 571 NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused);
518 spin_unlock_bh(&x->lock);
519 572
573 if (x->aead)
574 NLA_PUT(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead);
520 if (x->aalg) 575 if (x->aalg)
521 NLA_PUT(skb, XFRMA_ALG_AUTH, alg_len(x->aalg), x->aalg); 576 NLA_PUT(skb, XFRMA_ALG_AUTH, xfrm_alg_len(x->aalg), x->aalg);
522 if (x->ealg) 577 if (x->ealg)
523 NLA_PUT(skb, XFRMA_ALG_CRYPT, alg_len(x->ealg), x->ealg); 578 NLA_PUT(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg);
524 if (x->calg) 579 if (x->calg)
525 NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); 580 NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
526 581
@@ -1050,7 +1105,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
1050 return xp; 1105 return xp;
1051 error: 1106 error:
1052 *errp = err; 1107 *errp = err;
1053 kfree(xp); 1108 xfrm_policy_destroy(xp);
1054 return NULL; 1109 return NULL;
1055} 1110}
1056 1111
@@ -1815,6 +1870,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
1815#undef XMSGSIZE 1870#undef XMSGSIZE
1816 1871
1817static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { 1872static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
1873 [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) },
1818 [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, 1874 [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) },
1819 [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, 1875 [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) },
1820 [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, 1876 [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
@@ -1979,10 +2035,12 @@ static int xfrm_notify_sa_flush(struct km_event *c)
1979static inline size_t xfrm_sa_len(struct xfrm_state *x) 2035static inline size_t xfrm_sa_len(struct xfrm_state *x)
1980{ 2036{
1981 size_t l = 0; 2037 size_t l = 0;
2038 if (x->aead)
2039 l += nla_total_size(aead_len(x->aead));
1982 if (x->aalg) 2040 if (x->aalg)
1983 l += nla_total_size(alg_len(x->aalg)); 2041 l += nla_total_size(xfrm_alg_len(x->aalg));
1984 if (x->ealg) 2042 if (x->ealg)
1985 l += nla_total_size(alg_len(x->ealg)); 2043 l += nla_total_size(xfrm_alg_len(x->ealg));
1986 if (x->calg) 2044 if (x->calg)
1987 l += nla_total_size(sizeof(*x->calg)); 2045 l += nla_total_size(sizeof(*x->calg));
1988 if (x->encap) 2046 if (x->encap)
@@ -1993,8 +2051,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
1993 if (x->coaddr) 2051 if (x->coaddr)
1994 l += nla_total_size(sizeof(*x->coaddr)); 2052 l += nla_total_size(sizeof(*x->coaddr));
1995 2053
1996 /* Must count this as this may become non-zero behind our back. */ 2054 /* Must count x->lastused as it may become non-zero behind our back. */
1997 l += nla_total_size(sizeof(x->lastused)); 2055 l += nla_total_size(sizeof(u64));
1998 2056
1999 return l; 2057 return l;
2000} 2058}
@@ -2427,7 +2485,7 @@ static void __exit xfrm_user_exit(void)
2427 xfrm_unregister_km(&netlink_mgr); 2485 xfrm_unregister_km(&netlink_mgr);
2428 rcu_assign_pointer(xfrm_nl, NULL); 2486 rcu_assign_pointer(xfrm_nl, NULL);
2429 synchronize_rcu(); 2487 synchronize_rcu();
2430 sock_release(nlsk->sk_socket); 2488 netlink_kernel_release(nlsk);
2431} 2489}
2432 2490
2433module_init(xfrm_user_init); 2491module_init(xfrm_user_init);